BBS2chProxyConnection.cpp 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386
  1. #include <string>
  2. #include <vector>
  3. #include <map>
  4. #include <set>
  5. #include <sstream>
  6. #include <stdexcept>
  7. #include <algorithm>
  8. #include <pthread.h>
  9. #include <time.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <unistd.h>
  13. #ifdef USE_LUA
  14. #include <lua.hpp>
  15. #endif
  16. #ifdef _WIN32
  17. #include <fcntl.h>
  18. #include <winsock2.h>
  19. #include <ws2tcpip.h>
  20. #include <mswsock.h>
  21. #define CLOSESOCKET(x) closesocket(x)
  22. #define gmtime_r(a, b) gmtime_s(b, a)
  23. #else
  24. #include <sys/socket.h>
  25. #include <netinet/in.h>
  26. #include <netdb.h>
  27. #include <arpa/inet.h>
  28. #include <poll.h>
  29. #define CLOSESOCKET(x) close(x)
  30. #endif
  31. #include "BBS2chProxyConnection.h"
  32. #include "DataStorage.h"
  33. #include "hmac.h"
  34. #include "stringEncodingConverter.h"
  35. #include "BBS2chProxyRawSocket.h"
  36. #ifdef USE_MITM
  37. #include "BBS2chProxySecureSocket.h"
  38. #endif
  39. //#define DEBUG 1
  40. extern char *proxy_server;
  41. extern long proxy_port;
  42. extern long proxy_type;
  43. extern long timeout;
  44. extern char *user_agent;
  45. extern char *api_ua_dat;
  46. extern char *x_2ch_ua_dat;
  47. extern char *appKey;
  48. extern char *hmacKey;
  49. extern int allow_chunked;
  50. extern int curl_features;
  51. extern unsigned int curl_version_number;
  52. extern bool accept_https;
  53. extern int force_5chnet;
  54. extern int force_5chnet_https;
  55. extern int force_ipv4;
  56. extern char *bbsmenu_url;
  57. extern char *api_server;
  58. extern std::map<std::string, std::string> bbscgi_headers;
  59. extern int gikofix;
  60. extern CURLSH *curl_share;
  61. extern char *lua_script;
  62. extern unsigned int api_mode;
  63. extern std::vector<std::string> bbscgi_postorder;
  64. extern unsigned int bbscgi_utf8;
  65. extern int api_override;
  66. #ifdef USE_MITM
  67. extern unsigned int mitm_mode;
  68. #endif
  69. extern void log_printf(int level, const char *format ...);
  70. #include "utils.h"
  71. BBS2chProxyKeyManager BBS2chProxyConnection::keyManager;
  72. BBS2chProxyAuth BBS2chProxyConnection::auth;
  73. static regex_t regex;
  74. static regex_t regex_kako;
  75. static regex_t regex_offlaw;
  76. static regex_t regex_api;
  77. static regex_t regex_api_auth;
  78. #ifdef USE_LUA
  79. extern "C" {
  80. static int lua_hmacSHA256(lua_State *l)
  81. {
  82. static const char *table = "0123456789abcdef";
  83. size_t keyLength, dataLength;
  84. const char *key = luaL_checklstring(l, 1, &keyLength);
  85. const char *data = luaL_checklstring(l, 2, &dataLength);
  86. if (!key || !data) return 0;
  87. unsigned char digest[32];
  88. char digestStr[65];
  89. proxy2ch_HMAC_SHA256(key, keyLength, data, dataLength, digest);
  90. for (int i=0; i<32; i++) {
  91. unsigned char c = digest[i];
  92. unsigned char upper = (c >> 4) & 0xf;
  93. unsigned char lower = c & 0xf;
  94. digestStr[i*2] = table[upper];
  95. digestStr[i*2+1] = table[lower];
  96. }
  97. digestStr[64] = 0;
  98. lua_pushstring(l, digestStr);
  99. return 1;
  100. }
  101. static int lua_decodeURIComponent(lua_State *l)
  102. {
  103. size_t length;
  104. const char *input = luaL_checklstring(l, 1, &length);
  105. if (!input) return 0;
  106. bool decodePlus = true;
  107. if (!lua_isnoneornil(l, 2)) {
  108. decodePlus = (lua_toboolean(l, 2));
  109. }
  110. std::string output = decodeURIComponent(input, length, decodePlus);
  111. lua_pushstring(l, output.c_str());
  112. return 1;
  113. }
  114. static int lua_encodeURIComponent(lua_State *l)
  115. {
  116. size_t length;
  117. const char *input = luaL_checklstring(l, 1, &length);
  118. if (!input) return 0;
  119. bool spaceAsPlus = true;
  120. if (!lua_isnoneornil(l, 2)) {
  121. spaceAsPlus = (lua_toboolean(l, 2));
  122. }
  123. std::string output = encodeURIComponent(input, length, spaceAsPlus);
  124. lua_pushstring(l, output.c_str());
  125. return 1;
  126. }
  127. static int lua_convertShiftJISToUTF8(lua_State *l)
  128. {
  129. size_t length;
  130. const char *input = luaL_checklstring(l, 1, &length);
  131. if (!input) return 0;
  132. if (length > 0) {
  133. char *output = convertShiftJISToUTF8(input, length);
  134. if (!output) lua_pushnil(l);
  135. else {
  136. lua_pushstring(l, output);
  137. free(output);
  138. }
  139. }
  140. else lua_pushstring(l, "");
  141. return 1;
  142. }
  143. static int lua_isExpiredKey(lua_State *l)
  144. {
  145. size_t length;
  146. const char *input = luaL_checklstring(l, 1, &length);
  147. if (!input) return 0;
  148. if (BBS2chProxyConnection::keyManager.isExpired(input)) {
  149. lua_pushboolean(l, 1);
  150. }
  151. else lua_pushboolean(l, 0);
  152. return 1;
  153. }
  154. static int lua_isValidAsUTF8(lua_State *l)
  155. {
  156. size_t length;
  157. const char *input = luaL_checklstring(l, 1, &length);
  158. if (!input) return 0;
  159. lua_pushboolean(l, isValidAsUTF8(input, length));
  160. return 1;
  161. }
  162. static int lua_getMonaKey(lua_State *l)
  163. {
  164. size_t length;
  165. const char *input = luaL_checklstring(l, 1, &length);
  166. if (!input) return 0;
  167. const std::string &key = BBS2chProxyConnection::keyManager.getKey(input);
  168. lua_pushstring(l, key.c_str());
  169. return 1;
  170. }
  171. }
  172. #endif
  173. void BBS2chProxyConnection::run(void * (*func)(void *))
  174. {
  175. pthread_t thread;
  176. pthread_attr_t thread_attr;
  177. pthread_attr_init(&thread_attr);
  178. pthread_attr_setdetachstate(&thread_attr , PTHREAD_CREATE_DETACHED);
  179. if(0 != pthread_create(&thread , &thread_attr , func , this))
  180. perror("pthread_create");
  181. pthread_attr_destroy(&thread_attr);
  182. }
  183. struct TunnelSockets {
  184. int sock_c;
  185. int sock_s;
  186. std::string addr;
  187. int port;
  188. };
  189. static void *tunnelMain(void *param)
  190. {
  191. TunnelSockets *sockets = (TunnelSockets *)param;
  192. char *buf = new char[16384];
  193. #ifdef _WIN32
  194. fd_set fds;
  195. int nfds = sockets->sock_c > sockets->sock_s ? sockets->sock_c + 1 : sockets->sock_s + 1;
  196. #else
  197. struct pollfd fds[2];
  198. memset(fds, 0, sizeof(fds));
  199. fds[0].fd = sockets->sock_c;
  200. fds[0].events = POLLIN;
  201. fds[1].fd = sockets->sock_s;
  202. fds[1].events = POLLIN;
  203. #endif
  204. while (1) {
  205. #ifdef _WIN32
  206. FD_ZERO(&fds);
  207. FD_SET(sockets->sock_c, &fds);
  208. FD_SET(sockets->sock_s, &fds);
  209. if (select(nfds, &fds, NULL, NULL, NULL) < 0) break;
  210. if (FD_ISSET(sockets->sock_c, &fds)) {
  211. int ret = recv(sockets->sock_c, buf, 16384, 0);
  212. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  213. else if (ret <= 0) break;
  214. }
  215. if (FD_ISSET(sockets->sock_s, &fds)) {
  216. int ret = recv(sockets->sock_s, buf, 16384, 0);
  217. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  218. else if (ret <= 0) break;
  219. }
  220. #else
  221. if (poll(fds, 2, -1) < 0) break;
  222. if (fds[0].revents & POLLIN) {
  223. int ret = recv(sockets->sock_c, buf, 16384, 0);
  224. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  225. else if (ret <= 0) break;
  226. }
  227. else if (fds[0].revents != 0) break;
  228. if (fds[1].revents & POLLIN) {
  229. int ret = recv(sockets->sock_s, buf, 16384, 0);
  230. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  231. else if (ret <= 0) break;
  232. }
  233. else if (fds[1].revents != 0) break;
  234. #endif
  235. }
  236. CLOSESOCKET(sockets->sock_c);
  237. CLOSESOCKET(sockets->sock_s);
  238. log_printf(1, "Finished tunneling to %s:%d\n", sockets->addr.c_str(), sockets->port);
  239. delete sockets;
  240. delete[] buf;
  241. return NULL;
  242. }
  243. int BBS2chProxyConnection::tunnel(const char *addr, int port)
  244. {
  245. struct sockaddr_in server;
  246. memset(&server, 0, sizeof(server));
  247. server.sin_family = AF_INET;
  248. server.sin_addr.s_addr = inet_addr(addr);
  249. server.sin_port = htons(port);
  250. if(server.sin_addr.s_addr == 0xffffffff) {
  251. struct hostent *host;
  252. host = gethostbyname(addr);
  253. if (host == NULL) {
  254. log_printf(0, "Failed to lookup hostname %s\n", addr);
  255. sendResponse(400, "Bad Request", socketToClient);
  256. return 400;
  257. }
  258. server.sin_addr.s_addr = *(unsigned int *)host->h_addr_list[0];
  259. }
  260. log_printf(1,"Tunneling connection to %s:%d\n",addr,port);
  261. int sock_s = socket(AF_INET, SOCK_STREAM, 0);
  262. if(-1 == ::connect(sock_s, (struct sockaddr *)&server, sizeof(server))) {
  263. perror("connect");
  264. sendResponse(400, "Bad Request", socketToClient);
  265. return 400;
  266. }
  267. send(sock_c, "HTTP/1.1 200 Connection established\r\n\r\n", 39, 0);
  268. TunnelSockets *sockets = new TunnelSockets();
  269. sockets->sock_c = sock_c;
  270. sockets->sock_s = sock_s;
  271. sockets->addr = addr;
  272. sockets->port = port;
  273. pthread_t thread;
  274. if(0 != pthread_create(&thread, NULL, tunnelMain, sockets))
  275. perror("pthread_create");
  276. pthread_detach(thread);
  277. return 0;
  278. }
  279. void BBS2chProxyConnection::connect(void)
  280. {
  281. char method[32], url[1024], protocol[32];
  282. int i;
  283. char *buf, *ptr;
  284. unsigned int datProxyMode = 0; // 0: no dat, 1: read.cgi or API, 2: force API, 3: kakolog
  285. regmatch_t match[7];
  286. long statusCode = 0;
  287. BBS2chProxyURL baseURL;
  288. BBS2chProxyHttpHeaders requestHeaders;
  289. socketToClient = new BBS2chProxyRawSocket(sock_c);
  290. buf = (char *)malloc(16384);
  291. if(!buf) goto end;
  292. beginHandleRequest:
  293. ptr = buf;
  294. if(!socketToClient->readLine(buf, 1024)) {
  295. sendResponse(400, "Bad Request", socketToClient);
  296. statusCode = 400;
  297. goto end;
  298. }
  299. i=0;
  300. while(*ptr != ' ' && *ptr != 0 && i < 32) method[i++] = *ptr++;
  301. if(*ptr == 0 || i == 32) {
  302. sendResponse(400, "Bad Request", socketToClient);
  303. statusCode = 400;
  304. goto end;
  305. }
  306. method[i] = 0;
  307. ptr++;
  308. i=0;
  309. while(*ptr != ' ' && *ptr != 0 && i < 1024) url[i++] = *ptr++;
  310. if(*ptr == 0 || i == 1024) {
  311. sendResponse(400, "Bad Request", socketToClient);
  312. statusCode = 400;
  313. goto end;
  314. }
  315. url[i] = 0;
  316. ptr++;
  317. i=0;
  318. while(*ptr != '\r' && *ptr != '\n' && *ptr != 0 && i < 32) protocol[i++] = *ptr++;
  319. if(*ptr == 0 || i == 32) {
  320. sendResponse(400, "Bad Request", socketToClient);
  321. statusCode = 400;
  322. goto end;
  323. }
  324. protocol[i] = 0;
  325. if(!strncasecmp(protocol,"HTTP/1.0",8)) {
  326. isClientHttp1_0 = true;
  327. }
  328. else isClientHttp1_0 = false;
  329. log_printf(1, "Received %s %s %s\n",method,url,protocol);
  330. if(strcasecmp(method,"GET") && strcasecmp(method,"POST") && strcasecmp(method,"HEAD") && strcasecmp(method,"CONNECT") && strcasecmp(method,"PUT") && strcasecmp(method, "OPTIONS")) {
  331. sendResponse(400, "Bad Request", socketToClient);
  332. statusCode = 400;
  333. goto end;
  334. }
  335. if(!url[0]) {
  336. sendResponse(400, "Bad Request", socketToClient);
  337. statusCode = 400;
  338. goto end;
  339. }
  340. if(strncasecmp(protocol,"HTTP",4)) {
  341. sendResponse(400, "Bad Request", socketToClient);
  342. statusCode = 400;
  343. goto end;
  344. }
  345. if(!strcasecmp(method,"CONNECT")) {
  346. if(!accept_https || baseURL.isValid()) {
  347. sendResponse(400, "Bad Request", socketToClient);
  348. statusCode = 400;
  349. goto end;
  350. }
  351. while(socketToClient->readLine(buf, 16384)) {
  352. if(!strcmp("\r\n",buf)) break;
  353. }
  354. int port = 443;
  355. char *ptr = strchr(url, ':');
  356. if(ptr) {
  357. *ptr = 0;
  358. port = atoi(ptr+1);
  359. }
  360. #ifdef USE_MITM
  361. bool useMITM = false;
  362. if (mitm_mode) {
  363. baseURL = BBS2chProxyURL("https", url);
  364. if (mitm_mode == 2) useMITM = true;
  365. else if (mitm_mode == 1 && baseURL.isFamilyOf5chNet()) useMITM = true;
  366. }
  367. if (useMITM) {
  368. socketToClient->writeString("HTTP/1.1 200 Connection established\r\n\r\n");
  369. if (port == 80) {
  370. baseURL.setScheme("http");
  371. goto beginHandleRequest;
  372. }
  373. else {
  374. try {
  375. BBS2chProxySecureSocket *secureSocket = new BBS2chProxySecureSocket(sock_c, url);
  376. delete socketToClient;
  377. socketToClient = secureSocket;
  378. if (port != 443) baseURL.setPort(port);
  379. isHttps = true;
  380. goto beginHandleRequest;
  381. } catch (const std::runtime_error& e) {
  382. log_printf(0, "%s\n", e.what());
  383. sendResponse(400, "Bad Request", socketToClient);
  384. statusCode = 400;
  385. goto end;
  386. }
  387. }
  388. }
  389. else
  390. #endif
  391. {
  392. statusCode = tunnel(url, port);
  393. /* if a return value is non-zero, tunnel function failed to establish a tunnelling connection */
  394. if (statusCode == 0) {
  395. delete socketToClient;
  396. socketToClient = NULL;
  397. }
  398. goto end;
  399. }
  400. }
  401. #if USE_MITM
  402. if (baseURL.isValid()) {
  403. requestURL = BBS2chProxyURL(baseURL, url);
  404. log_printf(1, "Running as MITM proxy for %s\n", requestURL.absoluteString().c_str());
  405. } else
  406. #endif
  407. requestURL = BBS2chProxyURL(url);
  408. if (!requestURL.isHttp()) {
  409. sendResponse(400, "Bad Request", socketToClient);
  410. statusCode = 400;
  411. goto end;
  412. }
  413. if (force_5chnet) {
  414. if (requestURL.getHost() != "menu.2ch.net" && requestURL.replaceHost("2ch.net", "5ch.net")) {
  415. force5ch = true;
  416. log_printf(1, "Detected *.2ch.net URL, changed target URL to %s\n", requestURL.absoluteString().c_str());
  417. }
  418. }
  419. /* parse request headers */
  420. while (socketToClient->readLine(buf, 16384)) {
  421. if (!strcmp("\r\n",buf)) break;
  422. requestHeaders.add(buf);
  423. }
  424. if (requestHeaders.hasNameAndValue("Transfer-Encoding", "chunked")) {
  425. isClientChunked = true;
  426. }
  427. else if (requestHeaders.has("Content-Length")) {
  428. content_length = atoi(requestHeaders.get("Content-Length").c_str());
  429. }
  430. if (requestHeaders.has("Expect")) {
  431. if (!strcasecmp(requestHeaders.get("Expect").c_str(), "100-continue") && !isClientHttp1_0) {
  432. log_printf(1, "Received Expect: 100-continue header, sending 100 Continue response to the client\n");
  433. socketToClient->writeString("HTTP/1.1 100 Continue\r\n\r\n");
  434. }
  435. }
  436. if (regexec(&regex, requestURL.absoluteString().c_str(), 6, match, 0) != REG_NOMATCH) {
  437. if ((appKey && (api_mode & 1)) || !requestURL.hostStartsWith("headline.")) datProxyMode = 1;
  438. }
  439. else if (regexec(&regex_kako, requestURL.absoluteString().c_str(), 7, match, 0) != REG_NOMATCH) {
  440. datProxyMode = 3;
  441. }
  442. else if (regexec(&regex_offlaw, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  443. const char *tmp = requestURL.absoluteString().c_str();
  444. const char *thread = strstr(tmp, "key=");
  445. if (thread) {
  446. match[6].rm_so = thread+4-tmp;
  447. match[6].rm_eo = thread+4-tmp;
  448. const char *ptr = thread+4;
  449. while (*ptr != '&' && *ptr != 0) {
  450. ptr++;
  451. match[6].rm_eo++;
  452. }
  453. if (match[6].rm_so != match[6].rm_eo) datProxyMode = 3;
  454. }
  455. }
  456. else if (api_override && appKey) {
  457. if (regexec(&regex_api, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  458. datProxyMode = 2;
  459. match[1] = match[2];
  460. match[5] = match[4];
  461. match[4] = match[3];
  462. }
  463. else if (regexec(&regex_api_auth, requestURL.absoluteString().c_str(), 2, match, 0) != REG_NOMATCH) {
  464. /* return dummy response immediately */
  465. log_printf(1, "Returning dummy response because API overriding is enabled\n");
  466. statusCode = 200;
  467. sendBasicHeaders(200, "OK", socketToClient);
  468. if (0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto end;
  469. if (0 >= socketToClient->writeString("Content-Length: 203\r\n\r\n")) goto end;
  470. if (0 >= socketToClient->writeString("SESSION-ID=Monazilla/1.00:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")) goto end;
  471. goto end;
  472. }
  473. }
  474. if (datProxyMode) {
  475. // match[1] : hostPrefix
  476. // match[2] : host
  477. // match[4] : bbs
  478. // match[5 or 6] : key
  479. const char *original = requestURL.absoluteString().c_str();
  480. const regmatch_t *threadMatch = datProxyMode != 3 ? match+5 : match+6;
  481. std::string board(original + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
  482. std::string thread(original + threadMatch->rm_so, threadMatch->rm_eo - threadMatch->rm_so);
  483. if (!appKey || (datProxyMode == 1 && !(api_mode & 1)) || datProxyMode == 3) {
  484. log_printf(1, "Retrieving thread via read.cgi...\n");
  485. std::string host(original + match[1].rm_so, match[2].rm_eo - match[1].rm_so);
  486. threadKey = host;
  487. threadKey += '/';
  488. threadKey += board;
  489. threadKey += '/';
  490. threadKey += thread;
  491. std::string targetURL = (force_5chnet_https || isHttps) ? "https://" : "http://";
  492. targetURL += host;
  493. targetURL += "/test/read.cgi/";
  494. targetURL += board;
  495. targetURL += '/';
  496. targetURL += thread;
  497. targetURL += '/';
  498. if (force_5chnet_https) isHttps = true;
  499. statusCode = datProxy(targetURL.c_str(), method, requestHeaders);
  500. }
  501. else {
  502. log_printf(1, "Retrieving thread via API...\n");
  503. std::string hostPrefix(original + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
  504. std::string targetURL = "https://";
  505. targetURL += api_server;
  506. targetURL += "/v1/";
  507. targetURL += hostPrefix;
  508. targetURL += '/';
  509. targetURL += board;
  510. targetURL += '/';
  511. targetURL += thread;
  512. isHttps = true;
  513. statusCode = datProxyAPI(targetURL.c_str(), method, requestHeaders);
  514. }
  515. }
  516. else {
  517. if (force_5chnet_https && !isHttps && requestURL.isFamilyOf5chNet()) {
  518. requestURL.setScheme("https");
  519. isHttps = true;
  520. log_printf(1, "The host %s is 5ch.net family, connecting with HTTPS\n", requestURL.getHost().c_str());
  521. }
  522. if (bbsmenu_url && requestURL.equals(BBS2chProxyURL(bbsmenu_url), true)) {
  523. log_printf(1, "Running as a BBS menu proxy...\n");
  524. statusCode = bbsmenuProxy(requestURL.absoluteString().c_str(), method, requestHeaders);
  525. }
  526. else {
  527. bool isPostRequest = !strcasecmp(method, "POST");
  528. bool isPutRequest = !strcasecmp(method, "PUT");
  529. if (isPostRequest && requestURL.isFamilyOf5chNet() && requestURL.pathStartsWith("/test/bbs.cgi")) bbscgi = true;
  530. if (bbscgi) log_printf(1, "Looks like a request to bbs.cgi, will be modified before sending...\n");
  531. else log_printf(1, "Not a notable request, will be forwarded to server...\n");
  532. if (force_5chnet) {
  533. if (requestHeaders.has("Host")) {
  534. std::string host = requestHeaders.get("Host");
  535. size_t pos = host.find("2ch.net");
  536. if (pos != std::string::npos && pos+7 == host.length()) {
  537. if (pos == 0 || host[pos-1] == '.') {
  538. host.replace(pos, 1, "5");
  539. requestHeaders.set("Host", host);
  540. }
  541. }
  542. }
  543. if (bbscgi && requestHeaders.has("Referer")) {
  544. std::string referrer = requestHeaders.get("Referer");
  545. size_t pos = referrer.find("2ch.net");
  546. if (pos != std::string::npos) {
  547. if (pos == 0 || referrer[pos-1] == '.') {
  548. referrer.replace(pos, 1, "5");
  549. requestHeaders.set("Referer", referrer);
  550. }
  551. }
  552. }
  553. }
  554. requestHeaders.remove("Connection");
  555. if (user_agent) requestHeaders.remove("User-Agent");
  556. if (bbscgi && (content_length > 0 || isClientChunked)) {
  557. bool isNotFormURLEncoded = false;
  558. if (requestHeaders.has("Content-Type") && requestHeaders.get("Content-Type").find("application/x-www-form-urlencoded") == std::string::npos) {
  559. isNotFormURLEncoded = true;
  560. }
  561. if (!isNotFormURLEncoded) {
  562. requestHeaders.remove("Content-Length");
  563. if (!bbscgi_headers.empty()) {
  564. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it != bbscgi_headers.end(); it++) {
  565. if (requestHeaders.has(it->first)) {
  566. log_printf(1, "Ignoring header \"%s\" because custom header will be appended\n", it->first.c_str());
  567. requestHeaders.remove(it->first);
  568. }
  569. }
  570. }
  571. char *postdata = NULL;
  572. if (isClientChunked) {
  573. content_length = readChunkedBodyIntoBuffer(&postdata, socketToClient);
  574. requestHeaders.remove("Transfer-Encoding");
  575. }
  576. else {
  577. postdata = (char *)calloc(content_length+1, 1);
  578. content_length = socketToClient->read(postdata, content_length);
  579. }
  580. if (gikofix) {
  581. char *ptr = postdata+content_length-1;
  582. while (ptr >= postdata && (*ptr == '\r' || *ptr == '\n')) {
  583. *ptr-- = 0;
  584. }
  585. }
  586. curl_slist *headersForCurl = NULL;
  587. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  588. statusCode = bbsCgiProxy(requestURL.absoluteString().c_str(), requestHeaders, postdata);
  589. free(postdata);
  590. curl_slist_free_all(headersForCurl);
  591. goto end;
  592. }
  593. }
  594. curl_slist *headersForCurl = NULL;
  595. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  596. if (!requestHeaders.has("Expect")) {
  597. headersForCurl = curl_slist_append(headersForCurl, "Expext:");
  598. }
  599. if(curl) {
  600. CURLcode res;
  601. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  602. curl_easy_setopt(curl, CURLOPT_URL, requestURL.absoluteString().c_str());
  603. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  604. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  605. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  606. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  607. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  608. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  609. if(content_length) {
  610. /* set Content-Length explicitly via API to work properly with curl >= 7.66.0 */
  611. if(isPostRequest)
  612. curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, content_length);
  613. else if(isPutRequest)
  614. curl_easy_setopt(curl, CURLOPT_INFILESIZE, content_length);
  615. }
  616. curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback_proxy);
  617. curl_easy_setopt(curl, CURLOPT_READDATA, this);
  618. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  619. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  620. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  621. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  622. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  623. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  624. if(user_agent) {
  625. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  626. }
  627. if(isPostRequest) {
  628. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  629. }
  630. else if(isPutRequest) {
  631. curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L);
  632. }
  633. else if(!strcasecmp(method, "HEAD")) {
  634. curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  635. }
  636. else if(!strcasecmp(method, "OPTIONS")) {
  637. curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "OPTIONS");
  638. }
  639. if(proxy_server) {
  640. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  641. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  642. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  643. }
  644. res = curl_easy_perform(curl);
  645. if(res != CURLE_OK) {
  646. log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), requestURL.absoluteString().c_str());
  647. if(!status) sendResponse(503, "Service Unavailable", socketToClient);
  648. statusCode = 503;
  649. }
  650. else {
  651. if(isResponseChunked) {
  652. socketToClient->writeString("0\r\n\r\n");
  653. }
  654. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  655. }
  656. curl_easy_reset(curl);
  657. }
  658. curl_slist_free_all(headersForCurl);
  659. }
  660. }
  661. end:
  662. if(statusCode) log_printf(1, "Returned status code %d to client\n",statusCode);
  663. if(buf) free(buf);
  664. if(socketToClient) socketToClient->close();
  665. }
  666. int BBS2chProxyConnection::datProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  667. {
  668. DataStorage *html = NULL;
  669. long statusCode = 0;
  670. long rangeStart = 0, rangeEnd = 0;
  671. time_t lastModified = 0;
  672. time_t ifModifiedSince = 0;
  673. char *buf = (char *)malloc(16384);
  674. if(!buf) goto last;
  675. if(requestHeaders.has("Range")) {
  676. std::string value = requestHeaders.get("Range");
  677. if(value.find("bytes=") == 0 && value.find(",") == std::string::npos) {
  678. char *ptr = (char *)value.c_str() + 6;
  679. if(*ptr == '-') {
  680. rangeStart = atoi(ptr);
  681. }
  682. else {
  683. rangeStart = strtol(ptr, &ptr, 10);
  684. if(*ptr == '-') ptr++;
  685. if(*ptr && *ptr != '\r') {
  686. rangeEnd = strtol(ptr, NULL, 10);
  687. if(rangeEnd && rangeStart > rangeEnd) {
  688. sendResponse(416, "Requested range not satisfiable", socketToClient);
  689. statusCode = 416;
  690. goto last;
  691. }
  692. }
  693. }
  694. //fprintf(stderr, "range=%ld-%ld\n",rangeStart,rangeEnd);
  695. }
  696. else {
  697. sendResponse(416, "Requested range not satisfiable", socketToClient);
  698. statusCode = 416;
  699. goto last;
  700. }
  701. }
  702. if(requestHeaders.has("If-Modified-Since")) {
  703. struct tm time_ = {};
  704. strptime(requestHeaders.get("If-Modified-Since").c_str(), httpTimestampFmt, &time_);
  705. ifModifiedSince = mktime(&time_);
  706. }
  707. if(rangeStart > 0) {
  708. PBBS2chProxyThreadInfo info;
  709. pthread_mutex_lock(mutex);
  710. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  711. if(it != threadCache->end()) {
  712. info = it->second;
  713. }
  714. pthread_mutex_unlock(mutex);
  715. log_printf(5,"range request from %ld bytes\n",rangeStart);
  716. if(info) {
  717. int from = info->lastResNum;
  718. int alreadyRead = info->cachedSize;
  719. int lastResLength = info->cachedData->length;
  720. log_printf(5,"hit %s: cached %d bytes, last res size %d\n",threadKey.c_str(),alreadyRead,lastResLength);
  721. if(rangeStart <= alreadyRead && rangeStart >= alreadyRead - lastResLength) {
  722. if(curl) {
  723. CURLcode res;
  724. DataStorage *dat = new DataStorage();
  725. log_printf(5,"partial access from res num %d\n",from);
  726. snprintf(buf,16384,"%s%d-n",url,from);
  727. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  728. curl_easy_setopt(curl, CURLOPT_URL, buf);
  729. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  730. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  731. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  732. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  733. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  734. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  735. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  736. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  737. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  738. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  739. if(proxy_server) {
  740. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  741. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  742. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  743. }
  744. if(user_agent) {
  745. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  746. }
  747. else if(requestHeaders.has("User-Agent")) {
  748. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  749. }
  750. res = curl_easy_perform(curl);
  751. if(res == CURLE_OK) {
  752. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  753. curl_easy_reset(curl);
  754. if(statusCode == 200 && dat->length) {
  755. DataStorage *updated = html2dat(dat, from, &lastModified, true);
  756. if(ifModifiedSince && lastModified && updated && updated->length == lastResLength) {
  757. struct tm time_ = {};
  758. gmtime_r(&lastModified,&time_);
  759. time_t tmp = mktime(&time_);
  760. if(ifModifiedSince >= tmp) {
  761. sendResponse(304, "Not Modified", socketToClient);
  762. log_printf(5,"not modified!\n");
  763. delete updated;
  764. delete dat;
  765. statusCode = 304;
  766. goto last;
  767. }
  768. }
  769. if(updated && updated->length && updated->length >= lastResLength) {
  770. html = new DataStorage(alreadyRead - lastResLength);
  771. html->appendBytes(updated->bytes, updated->length);
  772. if(!rangeEnd) rangeEnd = html->length - 1;
  773. if(rangeStart > rangeEnd) {
  774. sendResponse(416, "Requested range not satisfiable", socketToClient);
  775. delete updated;
  776. delete dat;
  777. statusCode = 416;
  778. goto last;
  779. }
  780. statusCode = 206;
  781. log_printf(5,"cache hit; reconstructed data length:%ld\n",(long)html->length);
  782. }
  783. else {
  784. log_printf(5,"cache misshit?\n");
  785. sendResponse(416, "Requested range not satisfiable", socketToClient);
  786. delete updated;
  787. delete dat;
  788. statusCode = 416;
  789. goto last;
  790. }
  791. delete updated;
  792. }
  793. }
  794. else {
  795. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  796. curl_easy_reset(curl);
  797. }
  798. delete dat;
  799. if(html) goto resp;
  800. }
  801. }
  802. else {
  803. log_printf(5,"invalid cache contents\n");
  804. pthread_mutex_lock(mutex);
  805. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  806. if(it != threadCache->end()) {
  807. threadCache->erase(it);
  808. }
  809. pthread_mutex_unlock(mutex);
  810. }
  811. }
  812. }
  813. {
  814. if(curl) {
  815. CURLcode res;
  816. DataStorage *dat = new DataStorage();
  817. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  818. #if 1
  819. /* This is an ad-hoc fix against malformed read.cgi behaviors (e.g. krsw server) */
  820. snprintf(buf, 16384, "%s1-", url);
  821. curl_easy_setopt(curl, CURLOPT_URL, buf);
  822. #else
  823. curl_easy_setopt(curl, CURLOPT_URL, url);
  824. #endif
  825. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  826. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  827. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  828. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  829. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  830. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  831. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  832. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  833. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  834. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  835. if(proxy_server) {
  836. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  837. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  838. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  839. }
  840. if(user_agent) {
  841. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  842. }
  843. else if(requestHeaders.has("User-Agent")) {
  844. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  845. }
  846. res = curl_easy_perform(curl);
  847. if(res != CURLE_OK) {
  848. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  849. sendResponse(503, "Service Unavailable", socketToClient);
  850. curl_easy_reset(curl);
  851. delete dat;
  852. statusCode = 503;
  853. goto last;
  854. }
  855. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  856. curl_easy_reset(curl);
  857. if(statusCode == 200) {
  858. html = html2dat(dat, 1, &lastModified, false);
  859. }
  860. delete dat;
  861. }
  862. if(!html || !html->length) {
  863. sendResponse(503, "Service Unavailable", socketToClient);
  864. statusCode = 503;
  865. goto last;
  866. }
  867. if((rangeStart || rangeEnd) && html && html->length) {
  868. if(!rangeEnd) rangeEnd = html->length - 1;
  869. if(rangeStart < 0) rangeStart = html->length + rangeStart;
  870. if(rangeStart < html->length && rangeEnd < html->length && rangeStart <= rangeEnd) {
  871. statusCode = 206;
  872. }
  873. else {
  874. if(ifModifiedSince && lastModified && rangeStart == html->length) {
  875. struct tm time_ = {};
  876. gmtime_r(&lastModified,&time_);
  877. time_t tmp = mktime(&time_);
  878. if(ifModifiedSince >= tmp) {
  879. sendResponse(304, "Not Modified", socketToClient);
  880. log_printf(5,"not modified!\n");
  881. statusCode = 304;
  882. goto last;
  883. }
  884. }
  885. sendResponse(416, "Requested range not satisfiable", socketToClient);
  886. statusCode = 416;
  887. goto last;
  888. }
  889. }
  890. }
  891. resp:
  892. if(statusCode == 206) sendBasicHeaders(statusCode,"Partial Content",socketToClient);
  893. else sendBasicHeaders(statusCode,"OK",socketToClient);
  894. if(0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto last;
  895. if(0 >= socketToClient->writeString("Accept-Ranges: bytes\r\n")) goto last;
  896. if(statusCode == 206) {
  897. std::ostringstream ss;
  898. ss << "Content-Range: bytes " << rangeStart << "-" << rangeEnd << "/" << html->length << "\r\n";
  899. if (0 >= socketToClient->writeString(ss.str())) goto last;
  900. //fprintf(stderr,"Content-Length: %ld\r\n",rangeEnd - rangeStart + 1);
  901. //fprintf(stderr,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length);
  902. DataStorage *newHtml = new DataStorage();
  903. newHtml->appendBytes(html->bytes+rangeStart, rangeEnd - rangeStart + 1);
  904. delete html;
  905. html = newHtml;
  906. }
  907. {
  908. std::ostringstream ss;
  909. ss << "Content-Length: " << html->length << "\r\n";
  910. if(0 >= socketToClient->writeString(ss.str())) goto last;
  911. }
  912. if(lastModified) {
  913. struct tm time_ = {};
  914. char date[256];
  915. gmtime_r(&lastModified,&time_);
  916. strftime(date,256,httpTimestampFmt,&time_);
  917. std::string header = "Last-Modified: ";
  918. header += date;
  919. header += "\r\n";
  920. if(0 >= socketToClient->writeString(header)) goto last;
  921. //fprintf(stderr,"Last-Modified: %s\r\n",date);
  922. }
  923. if(0 > socketToClient->writeString("\r\n")) goto last;
  924. if(html && statusCode >= 200 && statusCode < 300 && strcasecmp(method, "HEAD")) {
  925. if(html->length > socketToClient->write(html->bytes, html->length)) goto last;
  926. }
  927. last:
  928. if(buf) free(buf);
  929. if(html) delete html;
  930. return statusCode;
  931. }
  932. DataStorage *BBS2chProxyConnection::html2dat_old(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  933. {
  934. char *ptr = html->bytes;
  935. char *end = html->bytes + html->length - 1;
  936. DataStorage *txt = new DataStorage();
  937. int res = startResNum, i=0;
  938. char signature[32];
  939. char title[1024];
  940. int cachedSize = 0;
  941. bool bbspink = strstr(threadKey.c_str(),"bbspink.com") ? true : false;
  942. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  943. if(!ptr) {
  944. delete txt;
  945. return NULL;
  946. }
  947. ptr += 7;
  948. while(1) {
  949. if(*ptr == '<') {
  950. if(!strncasecmp(ptr,"</title>",8)) {
  951. ptr += 8;
  952. break;
  953. }
  954. else title[i++] = *ptr++;
  955. }
  956. else title[i++] = *ptr++;
  957. }
  958. title[i] = 0;
  959. snprintf(signature,32,"<dt>%d ",res);
  960. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  961. if(!ptr) {
  962. delete txt;
  963. return NULL;
  964. }
  965. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  966. if(!buffer) {
  967. delete txt;
  968. return NULL;
  969. }
  970. unsigned char *body = buffer;
  971. char *mail = (char *)body + 65536;
  972. char *name = mail + 1024;
  973. char *date = name + 1024;
  974. char *encrypted = date + 1024;
  975. while(ptr < end) {
  976. //fprintf(stderr,"%s\n",signature);
  977. DataStorage *resData = new DataStorage();
  978. i=0;
  979. mail[0] = 0;
  980. ptr = strstr(ptr,signature);
  981. ptr += strlen(signature);
  982. while(*ptr != '<') ptr++;
  983. ptr++;
  984. const char *endStr;
  985. if(*ptr == 'a' || *ptr == 'A') {
  986. replay:
  987. // has mail
  988. while(*ptr != '"') ptr++;
  989. ptr++;
  990. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  991. ptr += 28;
  992. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  993. encrypted[i] = 0;
  994. i = decryptMail((unsigned char *)mail,encrypted);
  995. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  996. ptr -= reconstruct_len;
  997. char *start = ptr;
  998. memcpy(ptr, "<a href=\"mailto:", 16);
  999. ptr += 16;
  1000. memcpy(ptr, mail, i);
  1001. ptr = start;
  1002. i=0;
  1003. goto replay;
  1004. }
  1005. else {
  1006. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1007. while(*ptr != '"') mail[i++] = *ptr++;
  1008. mail[i] = 0;
  1009. }
  1010. endStr = "</a>";
  1011. }
  1012. else if(*ptr == 'b') {
  1013. endStr = NULL;
  1014. }
  1015. else {
  1016. endStr = "</font>";
  1017. }
  1018. if(endStr) {
  1019. ptr = strstr(ptr,"<b>");
  1020. ptr += 3;
  1021. }
  1022. else {
  1023. ptr = strchr(ptr,'>');
  1024. ptr++;
  1025. }
  1026. i=0;
  1027. while(1) {
  1028. if(*ptr == '<') {
  1029. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  1030. ptr += 4;
  1031. if(endStr) ptr += strlen(endStr);
  1032. break;
  1033. }
  1034. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1035. int j=0;
  1036. ptr = strstr(ptr,"data-cfemail=\"");
  1037. ptr += 14;
  1038. while(*ptr != '"') encrypted[j++] = *ptr++;
  1039. encrypted[j] = 0;
  1040. j = decryptMail((unsigned char *)name+i,encrypted);
  1041. i += j;
  1042. ptr = strstr(ptr,"</script>");
  1043. ptr += 9;
  1044. }
  1045. else name[i++] = *ptr++;
  1046. }
  1047. else name[i++] = *ptr++;
  1048. }
  1049. resData->appendBytes(name, i);
  1050. resData->appendBytes("<>", 2);
  1051. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1052. resData->appendBytes("<>", 2);
  1053. ptr += 2;
  1054. i=0;
  1055. while(1) {
  1056. if(*ptr == '<') {
  1057. if(!strncasecmp(ptr,"<dd>",4)) {
  1058. ptr += 4;
  1059. break;
  1060. }
  1061. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  1062. memcpy(date+i,"BE:",3);
  1063. ptr += 23;
  1064. i += 3;
  1065. while(*ptr != ')') date[i++] = *ptr++;
  1066. date[i++] = '-';
  1067. ptr = strchr(ptr,'?');
  1068. ptr++;
  1069. char *tmp = strstr(ptr,"</a>");
  1070. memcpy(date+i,ptr,tmp-ptr);
  1071. i += tmp-ptr;
  1072. ptr = tmp + 4;
  1073. }
  1074. else date[i++] = *ptr++;
  1075. }
  1076. else date[i++] = *ptr++;
  1077. }
  1078. resData->appendBytes(date ,i);
  1079. resData->appendBytes("<>", 2);
  1080. i=0;
  1081. while(1) {
  1082. if(*ptr == '<') {
  1083. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  1084. ptr += 9;
  1085. break;
  1086. }
  1087. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  1088. while(i>0 &&body[i-1] == '\n') i--;
  1089. break;
  1090. }
  1091. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1092. int j=0;
  1093. ptr = strstr(ptr,"data-cfemail=\"");
  1094. ptr += 14;
  1095. while(*ptr != '"') encrypted[j++] = *ptr++;
  1096. encrypted[j] = 0;
  1097. j = decryptMail(body+i,encrypted);
  1098. i += j;
  1099. ptr = strstr(ptr,"</script>");
  1100. ptr += 9;
  1101. }
  1102. else if(!strncmp(ptr,"<a href=\"http",13)) {
  1103. ptr = strchr(ptr,'>');
  1104. ptr++;
  1105. char *link = ptr;
  1106. ptr = strstr(link,"</a>");
  1107. memcpy(body+i,link,ptr-link);
  1108. i += ptr-link;
  1109. ptr += 4;
  1110. }
  1111. else if(!strncmp(ptr,"<img src=\"",10)) {
  1112. ptr += 10;
  1113. char *img = ptr;
  1114. ptr = strstr(img,"\">");
  1115. memcpy(body+i,img,ptr-img);
  1116. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1117. int length = ptr-img;
  1118. while(*img != '/') {
  1119. img++;
  1120. length--;
  1121. }
  1122. memcpy(body+i,"sssp:",5);
  1123. memcpy(body+i+5,img,length);
  1124. i += length + 5;
  1125. }
  1126. else i += ptr-img;
  1127. ptr += 2;
  1128. }
  1129. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  1130. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1131. memcpy(body+i," <br>",5);
  1132. i += 5;
  1133. }
  1134. else {
  1135. memcpy(body+i,"<br>",4);
  1136. i += 4;
  1137. }
  1138. ptr += 4;
  1139. }
  1140. else body[i++] = *ptr++;
  1141. }
  1142. else if(!bbspink && *ptr == ' ') {
  1143. if(*(ptr+1) == ' ') ptr++;
  1144. else body[i++] = *ptr++;
  1145. }
  1146. else body[i++] = *ptr++;
  1147. }
  1148. resData->appendBytes(body ,i);
  1149. resData->appendBytes("<>", 2);
  1150. if(res == 1) resData->appendBytes(title ,strlen(title));
  1151. resData->appendBytes("\n" ,1);
  1152. if(useCache && res == startResNum) {
  1153. PBBS2chProxyThreadInfo info;
  1154. bool hit = false;
  1155. pthread_mutex_lock(mutex);
  1156. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1157. if(it != threadCache->end()) {
  1158. info = it->second;
  1159. threadCache->erase(it);
  1160. }
  1161. pthread_mutex_unlock(mutex);
  1162. if(info) {
  1163. log_printf(5,"cache hit");
  1164. if(info->cachedData->length == resData->length) {
  1165. log_printf(5,"... size match");
  1166. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1167. log_printf(5,"... content match");
  1168. hit = true;
  1169. cachedSize = info->cachedSize - resData->length;
  1170. }
  1171. }
  1172. log_printf(5,"\n");
  1173. }
  1174. if(!hit) {
  1175. delete resData;
  1176. free(buffer);
  1177. return NULL;
  1178. }
  1179. }
  1180. txt->appendBytes(resData->bytes, resData->length);
  1181. res++;
  1182. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1183. snprintf(signature,32,"<dt>%d ",res);
  1184. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  1185. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1186. info->lastResNum = res-1;
  1187. info->cachedSize = txt->length+cachedSize;
  1188. info->cachedData = resData;
  1189. pthread_mutex_lock(mutex);
  1190. threadCache->insert(std::make_pair(threadKey,info));
  1191. pthread_mutex_unlock(mutex);
  1192. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1193. if(lastModified) {
  1194. *lastModified = 0;
  1195. char formattedDate[256];
  1196. char *ptr;
  1197. ptr = date;
  1198. int year = strtol(ptr,&ptr,10);
  1199. if(*ptr != '/') break;
  1200. ptr++;
  1201. int month = strtol(ptr,&ptr,10);
  1202. if(*ptr != '/') break;
  1203. ptr++;
  1204. int day = strtol(ptr,&ptr,10);
  1205. if(!*ptr) break;
  1206. while(*ptr != ' ' && *ptr != 0) ptr++;
  1207. if(!*ptr) break;
  1208. ptr++;
  1209. int hour = strtol(ptr,&ptr,10);
  1210. if(*ptr != ':') break;
  1211. ptr++;
  1212. int minutes = strtol(ptr,&ptr,10);
  1213. if(*ptr != ':') break;
  1214. ptr++;
  1215. int seconds = strtol(ptr,&ptr,10);
  1216. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1217. if(year < 100) year += 2000;
  1218. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1219. //fprintf(stderr,"%s\n",formattedDate);
  1220. struct tm time = {};
  1221. strptime(formattedDate,threadTimestampFmt,&time);
  1222. *lastModified = mktime(&time);
  1223. //gmtime_r(lastModified,&time);
  1224. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1225. //fprintf(stderr,"%s\n",formattedDate);
  1226. }
  1227. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1228. break;
  1229. }
  1230. delete resData;
  1231. }
  1232. free(buffer);
  1233. return txt;
  1234. }
  1235. DataStorage *BBS2chProxyConnection::html2dat(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1236. {
  1237. char *ptr = html->bytes;
  1238. char *end = html->bytes + html->length - 1;
  1239. DataStorage *txt = new DataStorage();
  1240. int res = startResNum, i=0;
  1241. char signature[64];
  1242. char title[1024];
  1243. int cachedSize = 0;
  1244. char signatureTag[32];
  1245. char closeTag[32];
  1246. int closeTagLen;
  1247. bool isNewHTML = false;
  1248. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<div id=\"threadtitle\">", 22);
  1249. if (ptr) {
  1250. isNewHTML = true;
  1251. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<article id=\"", 13);
  1252. if (!ptr2) {
  1253. delete txt;
  1254. return NULL;
  1255. }
  1256. ptr += 22;
  1257. while (1) {
  1258. if (*ptr == '<') {
  1259. if (!strncasecmp(ptr, "</div>", 6)) {
  1260. ptr += 6;
  1261. break;
  1262. }
  1263. else title[i++] = *ptr++;
  1264. }
  1265. else if(*ptr == '\n') break;
  1266. else title[i++] = *ptr++;
  1267. }
  1268. title[i] = 0;
  1269. snprintf(signature, 32, "<article id=\"%d\"", res);
  1270. }
  1271. else {
  1272. ptr = html->bytes;
  1273. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  1274. if(!ptr) {
  1275. delete txt;
  1276. return html2dat_old(html, startResNum, lastModified, useCache);
  1277. }
  1278. else {
  1279. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  1280. if(ptr2) {
  1281. char *tmp = ptr2;
  1282. *ptr2 = 0;
  1283. while(*ptr2 != '<') ptr2--;
  1284. strcpy(signatureTag, ptr2);
  1285. *tmp = ' ';
  1286. }
  1287. else {
  1288. delete txt;
  1289. return NULL;
  1290. }
  1291. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  1292. if(ptr2) {
  1293. delete txt;
  1294. return html2dat_pink(html, startResNum, lastModified, useCache);
  1295. }*/
  1296. }
  1297. ptr += 18;
  1298. while(1) {
  1299. if(*ptr == '<') {
  1300. if(!strncasecmp(ptr,"</h1>",5)) {
  1301. ptr += 5;
  1302. break;
  1303. }
  1304. else title[i++] = *ptr++;
  1305. }
  1306. else if(*ptr == '\n') break;
  1307. else title[i++] = *ptr++;
  1308. }
  1309. title[i] = 0;
  1310. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  1311. }
  1312. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1313. if(!ptr) {
  1314. delete txt;
  1315. return NULL;
  1316. }
  1317. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1318. if(!buffer) {
  1319. delete txt;
  1320. return NULL;
  1321. }
  1322. unsigned char *body = buffer;
  1323. char *mail = (char *)body + 65536;
  1324. char *name = mail + 1024;
  1325. char *date = name + 1024;
  1326. char *encrypted = date + 1024;
  1327. while(ptr < end) {
  1328. //fprintf(stderr,"%s\n",signature);
  1329. DataStorage *resData = new DataStorage();
  1330. i=0;
  1331. mail[0] = 0;
  1332. if (isNewHTML) ptr = strstr(ptr," class=\"postusername\"><b>");
  1333. else ptr = strstr(ptr," class=\"name\"><b>");
  1334. if(ptr) {
  1335. char *tmp = ptr;
  1336. *ptr = 0;
  1337. while(*ptr != '<') ptr--;
  1338. snprintf(closeTag,32,"</%s>",ptr+1);
  1339. closeTagLen = strlen(closeTag);
  1340. if (isNewHTML) ptr = tmp + 25;
  1341. else ptr = tmp + 17;
  1342. }
  1343. else {
  1344. delete resData;
  1345. break;
  1346. }
  1347. char endStr[64];
  1348. if(!strncmp(ptr,"<a href=\"mailto:",16)) {
  1349. replay:
  1350. // has mail
  1351. while(*ptr != '"') ptr++;
  1352. ptr++;
  1353. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1354. ptr += 28;
  1355. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1356. encrypted[i] = 0;
  1357. i = decryptMail((unsigned char *)mail,encrypted);
  1358. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1359. ptr -= reconstruct_len;
  1360. char *start = ptr;
  1361. memcpy(ptr, "<a href=\"mailto:", 16);
  1362. ptr += 16;
  1363. memcpy(ptr, mail, i);
  1364. ptr = start;
  1365. i=0;
  1366. goto replay;
  1367. }
  1368. else {
  1369. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1370. while(1) {
  1371. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  1372. ptr = strchr(ptr,'>');
  1373. ptr++;
  1374. char *link = ptr;
  1375. ptr = strstr(link,"</a>");
  1376. memcpy(mail+i,link,ptr-link);
  1377. i += ptr-link;
  1378. ptr += 4;
  1379. }
  1380. else if(*ptr == '"') break;
  1381. else mail[i++] = *ptr++;
  1382. }
  1383. //while(*ptr != '"') mail[i++] = *ptr++;
  1384. mail[i] = 0;
  1385. }
  1386. snprintf(endStr,64,"</a></b>%s",closeTag);
  1387. while(*ptr != '>') ptr++;
  1388. ptr++;
  1389. }
  1390. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  1391. emit font tags anymore and it conflicts with text decorations using "melon point" */
  1392. /*else if(!strncmp(ptr,"<font",5)) {
  1393. snprintf(endStr,64,"</font></b>%s",closeTag);
  1394. while(*ptr != '>') ptr++;
  1395. ptr++;
  1396. }*/
  1397. else {
  1398. snprintf(endStr,64,"</b>%s",closeTag);
  1399. }
  1400. i=0;
  1401. while(1) {
  1402. if(*ptr == '<') {
  1403. if(!strncmp(ptr,endStr,strlen(endStr))) {
  1404. ptr += strlen(endStr);
  1405. break;
  1406. }
  1407. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1408. int j=0;
  1409. ptr = strstr(ptr,"data-cfemail=\"");
  1410. ptr += 14;
  1411. while(*ptr != '"') encrypted[j++] = *ptr++;
  1412. encrypted[j] = 0;
  1413. j = decryptMail((unsigned char *)name+i,encrypted);
  1414. i += j;
  1415. ptr = strstr(ptr,"</script>");
  1416. ptr += 9;
  1417. }
  1418. else if(!strncmp(ptr,"<a href=\"",9)) {
  1419. ptr = strchr(ptr,'>');
  1420. ptr++;
  1421. char *link = ptr;
  1422. ptr = strstr(link,"</a>");
  1423. memcpy(name+i,link,ptr-link);
  1424. i += ptr-link;
  1425. ptr += 4;
  1426. }
  1427. else name[i++] = *ptr++;
  1428. }
  1429. else name[i++] = *ptr++;
  1430. }
  1431. resData->appendBytes(name, i);
  1432. resData->appendBytes("<>", 2);
  1433. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1434. resData->appendBytes("<>", 2);
  1435. ptr = strstr(ptr," class=\"date\">");
  1436. if(ptr) {
  1437. char *tmp = ptr;
  1438. *ptr = 0;
  1439. while(*ptr != '<') ptr--;
  1440. snprintf(closeTag,32,"</%s>",ptr+1);
  1441. closeTagLen = strlen(closeTag);
  1442. ptr = tmp + 14;
  1443. }
  1444. else {
  1445. delete resData;
  1446. break;
  1447. }
  1448. i=0;
  1449. while(1) {
  1450. if(*ptr == '<') {
  1451. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1452. ptr += closeTagLen;
  1453. break;
  1454. }
  1455. else date[i++] = *ptr++;
  1456. }
  1457. else date[i++] = *ptr++;
  1458. }
  1459. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  1460. char *tmp = ptr+1;
  1461. while(*ptr != ' ') ptr++;
  1462. *ptr = 0;
  1463. snprintf(closeTag,32,"</%s>",tmp);
  1464. closeTagLen = strlen(closeTag);
  1465. ptr += 11;
  1466. while(*ptr != '>') ptr++;
  1467. ptr++;
  1468. date[i++] = ' ';
  1469. while(1) {
  1470. if(*ptr == '<') {
  1471. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1472. ptr += closeTagLen;
  1473. break;
  1474. }
  1475. else date[i++] = *ptr++;
  1476. }
  1477. else date[i++] = *ptr++;
  1478. }
  1479. }
  1480. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  1481. ptr += 14;
  1482. while(*ptr != '>') ptr++;
  1483. ptr++;
  1484. if(!strncmp(ptr,"<a href=\"",9)) {
  1485. ptr += 9;
  1486. while(*ptr != '/' && *ptr != '"') ptr++;
  1487. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  1488. memcpy(date+i," BE:",4);
  1489. i += 4;
  1490. ptr += 18;
  1491. while(*ptr != '"') date[i++] = *ptr++;
  1492. date[i++] = '-';
  1493. ptr = strchr(ptr,'?');
  1494. ptr++;
  1495. char *tmp = strstr(ptr,"</a>");
  1496. memcpy(date+i,ptr,tmp-ptr);
  1497. i += tmp-ptr;
  1498. ptr = tmp + 4;
  1499. }
  1500. }
  1501. }
  1502. resData->appendBytes(date ,i);
  1503. resData->appendBytes("<>", 2);
  1504. if (isNewHTML) {
  1505. ptr = strstr(ptr,"<section class=\"post-content\">");
  1506. if (!ptr) {
  1507. delete resData;
  1508. break;
  1509. }
  1510. else {
  1511. ptr += 30;
  1512. if (!strncasecmp(ptr, "<span class=\"AA\">", 17)) {
  1513. strcpy(closeTag, "</span></section>");
  1514. closeTagLen = 17;
  1515. ptr += 17;
  1516. }
  1517. else {
  1518. strcpy(closeTag, "</section>");
  1519. closeTagLen = 10;
  1520. }
  1521. }
  1522. }
  1523. else if(!strcmp(signatureTag,"<div")) {
  1524. ptr = strstr(ptr,"<div class=\"message\">");
  1525. if(!ptr) {
  1526. delete resData;
  1527. break;
  1528. }
  1529. else {
  1530. ptr += 21;
  1531. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  1532. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  1533. strcpy(closeTag,"</span></span></div>");
  1534. closeTagLen = 20;
  1535. ptr += 22+17;
  1536. }
  1537. else {
  1538. strcpy(closeTag,"</span></div>");
  1539. closeTagLen = 13;
  1540. ptr += 22;
  1541. }
  1542. }
  1543. else {
  1544. strcpy(closeTag,"</div>");
  1545. closeTagLen = 6;
  1546. }
  1547. }
  1548. }
  1549. else {
  1550. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  1551. if(!ptr) {
  1552. delete resData;
  1553. break;
  1554. }
  1555. strcpy(closeTag,"</dd>");
  1556. closeTagLen = 5;
  1557. ptr += 22;
  1558. }
  1559. i=0;
  1560. while(1) {
  1561. if(*ptr == '<') {
  1562. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1563. ptr += closeTagLen;
  1564. break;
  1565. }
  1566. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1567. int j=0;
  1568. ptr = strstr(ptr,"data-cfemail=\"");
  1569. ptr += 14;
  1570. while(*ptr != '"') encrypted[j++] = *ptr++;
  1571. encrypted[j] = 0;
  1572. j = decryptMail(body+i,encrypted);
  1573. i += j;
  1574. ptr = strstr(ptr,"</script>");
  1575. ptr += 9;
  1576. }
  1577. else if(!strncmp(ptr,"<a ",3)) {
  1578. char *tmp = strchr(ptr,'>');
  1579. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  1580. char *link = tmp+1;
  1581. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  1582. while(ptr < link) {
  1583. if(!strncmp(ptr," class=\"",8)) {
  1584. ptr += 8;
  1585. while(*ptr != '"' && *ptr != '>') ptr++;
  1586. if(*ptr == '"') ptr++;
  1587. }
  1588. else body[i++] = *ptr++;
  1589. }
  1590. }
  1591. else {
  1592. ptr = strstr(link,"</a>");
  1593. memcpy(body+i,link,ptr-link);
  1594. i += ptr-link;
  1595. ptr += 4;
  1596. }
  1597. }
  1598. else if(!strncmp(ptr,"<img src=\"",10)) {
  1599. ptr += 10;
  1600. char *img = ptr;
  1601. ptr = strstr(img,"\">");
  1602. memcpy(body+i,img,ptr-img);
  1603. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1604. int length = ptr-img;
  1605. while(*img != '/') {
  1606. img++;
  1607. length--;
  1608. }
  1609. memcpy(body+i,"sssp:",5);
  1610. memcpy(body+i+5,img,length);
  1611. i += length + 5;
  1612. }
  1613. else i += ptr-img;
  1614. ptr += 2;
  1615. }
  1616. else if(!strncmp(ptr,"<br>",4)) {
  1617. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1618. memcpy(body+i," <br>",5);
  1619. i += 5;
  1620. }
  1621. else {
  1622. memcpy(body+i,"<br>",4);
  1623. i += 4;
  1624. }
  1625. ptr += 4;
  1626. }
  1627. else body[i++] = *ptr++;
  1628. }
  1629. else body[i++] = *ptr++;
  1630. }
  1631. resData->appendBytes(body ,i);
  1632. resData->appendBytes("<>", 2);
  1633. if(res == 1) resData->appendBytes(title ,strlen(title));
  1634. resData->appendBytes("\n" ,1);
  1635. if(useCache && res == startResNum) {
  1636. PBBS2chProxyThreadInfo info;
  1637. bool hit = false;
  1638. pthread_mutex_lock(mutex);
  1639. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1640. if(it != threadCache->end()) {
  1641. info = it->second;
  1642. threadCache->erase(it);
  1643. }
  1644. pthread_mutex_unlock(mutex);
  1645. if(info) {
  1646. log_printf(5,"cache hit");
  1647. if(info->cachedData->length == resData->length) {
  1648. log_printf(5,"... size match");
  1649. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1650. log_printf(5,"... content match");
  1651. hit = true;
  1652. cachedSize = info->cachedSize - resData->length;
  1653. }
  1654. }
  1655. log_printf(5,"\n");
  1656. }
  1657. if(!hit) {
  1658. delete resData;
  1659. free(buffer);
  1660. return NULL;
  1661. }
  1662. }
  1663. txt->appendBytes(resData->bytes, resData->length);
  1664. res++;
  1665. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1666. if (isNewHTML) strcpy(signature, "<article id=\"");
  1667. else snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  1668. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1669. if(ptr) {
  1670. int next = atoi(ptr+strlen(signature));
  1671. if(next >= res) {
  1672. while(next > res) {
  1673. txt->appendBytes("broken<><>broken<> broken <>\n", 29);
  1674. res++;
  1675. }
  1676. }
  1677. else ptr = NULL;
  1678. }
  1679. if(!ptr) {
  1680. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1681. info->lastResNum = res-1;
  1682. info->cachedSize = txt->length+cachedSize;
  1683. info->cachedData = resData;
  1684. pthread_mutex_lock(mutex);
  1685. threadCache->insert(std::make_pair(threadKey,info));
  1686. pthread_mutex_unlock(mutex);
  1687. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1688. if(lastModified) {
  1689. *lastModified = 0;
  1690. char formattedDate[256];
  1691. char *ptr;
  1692. ptr = date;
  1693. int year = strtol(ptr,&ptr,10);
  1694. if(*ptr != '/') break;
  1695. ptr++;
  1696. int month = strtol(ptr,&ptr,10);
  1697. if(*ptr != '/') break;
  1698. ptr++;
  1699. int day = strtol(ptr,&ptr,10);
  1700. if(!*ptr) break;
  1701. while(*ptr != ' ' && *ptr != 0) ptr++;
  1702. if(!*ptr) break;
  1703. ptr++;
  1704. int hour = strtol(ptr,&ptr,10);
  1705. if(*ptr != ':') break;
  1706. ptr++;
  1707. int minutes = strtol(ptr,&ptr,10);
  1708. if(*ptr != ':') break;
  1709. ptr++;
  1710. int seconds = strtol(ptr,&ptr,10);
  1711. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1712. if(year < 100) year += 2000;
  1713. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1714. //fprintf(stderr,"%s\n",formattedDate);
  1715. struct tm time = {};
  1716. strptime(formattedDate,threadTimestampFmt,&time);
  1717. *lastModified = mktime(&time);
  1718. //gmtime_r(lastModified,&time);
  1719. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1720. //fprintf(stderr,"%s\n",formattedDate);
  1721. }
  1722. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1723. break;
  1724. }
  1725. delete resData;
  1726. }
  1727. free(buffer);
  1728. return txt;
  1729. }
  1730. int BBS2chProxyConnection::datProxyAPI(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1731. {
  1732. long statusCode = 0;
  1733. const std::string &postBody = auth.requestBodyForURL(url, curl);
  1734. bool directMode = false;
  1735. if (postBody.empty()) {
  1736. sendResponse(401, "Unauthorized", socketToClient);
  1737. return 401;
  1738. }
  1739. /* just read and strip off post body */
  1740. if (!strcasecmp(method, "POST")) {
  1741. char *postdata = NULL;
  1742. if (isClientChunked) {
  1743. readChunkedBodyIntoBuffer(&postdata, socketToClient);
  1744. }
  1745. else if (content_length) {
  1746. postdata = (char *)calloc(content_length+1, 1);
  1747. socketToClient->read(postdata, content_length);
  1748. }
  1749. if (postdata && strstr(postdata, "sid=")) directMode = true;
  1750. if (postdata) free(postdata);
  1751. }
  1752. if (curl) {
  1753. CURLcode res;
  1754. struct curl_slist *headersForCurl = NULL;
  1755. DataStorage receivedHeader;
  1756. DataStorage receivedBody;
  1757. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Range");
  1758. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "If-Modified-Since");
  1759. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Accept-Encoding");
  1760. if (x_2ch_ua_dat) headersForCurl = curl_slist_append(headersForCurl, x_2ch_ua_dat);
  1761. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1762. curl_easy_setopt(curl, CURLOPT_URL, url);
  1763. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  1764. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1765. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1766. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1767. curl_easy_setopt(curl, CURLOPT_WRITEDATA, &receivedBody);
  1768. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_download);
  1769. curl_easy_setopt(curl, CURLOPT_HEADERDATA, &receivedHeader);
  1770. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1771. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1772. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1773. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1774. if (proxy_server) {
  1775. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1776. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1777. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1778. }
  1779. if (api_ua_dat) {
  1780. curl_easy_setopt(curl, CURLOPT_USERAGENT, api_ua_dat);
  1781. }
  1782. else {
  1783. if (user_agent && !strncmp(user_agent, "Monazilla/", strlen("Monazilla/")))
  1784. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1785. else
  1786. curl_easy_setopt(curl, CURLOPT_USERAGENT, "");
  1787. }
  1788. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  1789. #if LIBCURL_VERSION_NUM >= 0x071101
  1790. curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, postBody.c_str());
  1791. #else
  1792. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postBody.c_str());
  1793. #endif
  1794. //return;
  1795. res = curl_easy_perform(curl);
  1796. if (res == CURLE_OK) {
  1797. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  1798. receivedHeader.appendBytes("", 1);
  1799. const char *ptr = receivedHeader.bytes;
  1800. /* this is necessary because the raw header may contain chunk trailers after real headers */
  1801. const char *end = strstr(receivedHeader.bytes, "\r\n\r\n");
  1802. int threadStatus = 0;
  1803. if (end && !directMode) {
  1804. BBS2chProxyHttpHeaders headers;
  1805. while (ptr < end) {
  1806. const char *lineEnd = strchr(ptr, '\n');
  1807. if (!lineEnd) break;
  1808. headers.add(ptr, lineEnd-ptr);
  1809. ptr = lineEnd + 1;
  1810. }
  1811. if (headers.has("Thread-Status")) {
  1812. threadStatus = atoi(headers.get("Thread-Status").c_str());
  1813. }
  1814. }
  1815. if (threadStatus == 1 || (directMode && end)) {
  1816. if (end+4-receivedHeader.bytes > socketToClient->write(receivedHeader.bytes, end+4-receivedHeader.bytes)) goto last;
  1817. if (receivedBody.length > socketToClient->write(receivedBody.bytes, receivedBody.length)) goto last;
  1818. goto last;
  1819. }
  1820. else if (threadStatus == 8) {
  1821. sendBasicHeaders(302, "Found", socketToClient);
  1822. if (0 >= socketToClient->writeString("Location: http://www2.2ch.net/live.html\r\n")) goto last;
  1823. if (0 >= socketToClient->writeString("\r\n")) goto last;
  1824. statusCode = 302;
  1825. goto last;
  1826. }
  1827. else {
  1828. if (statusCode < 400) {
  1829. sendResponse(401, "Unauthorized", socketToClient);
  1830. statusCode = 401;
  1831. }
  1832. else {
  1833. sendResponse(503, "Service Unavailable", socketToClient);
  1834. statusCode = 503;
  1835. }
  1836. receivedBody.appendBytes("",1);
  1837. if (!strncasecmp(receivedBody.bytes,"ng (",4)) {
  1838. log_printf(0, "API gateway returned error: %s\n", receivedBody.bytes);
  1839. }
  1840. }
  1841. //fprintf(stderr,"%ld\n",statusCode);
  1842. }
  1843. else {
  1844. log_printf(0, "curl error: %s\n", curl_easy_strerror(res));
  1845. sendResponse(503, "Service Unavailable", socketToClient);
  1846. statusCode = 503;
  1847. }
  1848. last:
  1849. curl_easy_reset(curl);
  1850. curl_slist_free_all(headersForCurl);
  1851. }
  1852. return statusCode;
  1853. }
  1854. int BBS2chProxyConnection::bbsmenuProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1855. {
  1856. long statusCode = 0;
  1857. DataStorage *dat = new DataStorage();
  1858. DataStorage *outHTML = new DataStorage();
  1859. if(curl) {
  1860. CURLcode res;
  1861. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1862. curl_easy_setopt(curl, CURLOPT_URL, url);
  1863. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1864. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1865. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  1866. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1867. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  1868. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  1869. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1870. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1871. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1872. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1873. if(proxy_server) {
  1874. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1875. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1876. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1877. }
  1878. if(user_agent) {
  1879. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1880. }
  1881. else if(requestHeaders.has("User-Agent")) {
  1882. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  1883. }
  1884. res = curl_easy_perform(curl);
  1885. if(res == CURLE_OK) {
  1886. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1887. if(statusCode == 200 && dat->length) {
  1888. dat->appendBytes("",1);
  1889. dat->length--;
  1890. char *ptr = dat->bytes;
  1891. while(*ptr) {
  1892. if(!strncasecmp(ptr,"<a href=",8)) {
  1893. char *start = ptr+8;
  1894. char *end = strchr(start,'>');
  1895. if (end) {
  1896. char *urlEnd = end;
  1897. if (*start == '"') {
  1898. start++;
  1899. char *tmp = strchr(start, '"');
  1900. if (tmp && tmp < end) urlEnd = tmp;
  1901. }
  1902. BBS2chProxyURL url(std::string(start, urlEnd-start).c_str());
  1903. if (url.isKindOfHost("5ch.net")) {
  1904. url.replaceHost("5ch.net", "2ch.net");
  1905. url.setScheme("http");
  1906. const std::string &replacedURL = url.absoluteString();
  1907. outHTML->appendBytes("<A HREF=", 8);
  1908. outHTML->appendBytes(replacedURL.c_str(), replacedURL.length());
  1909. outHTML->appendBytes(">", 1);
  1910. ptr = end+1;
  1911. continue;
  1912. } else if (url.isKindOfHost("bbspink.com")) {
  1913. url.setScheme("http");
  1914. const std::string &replacedURL = url.absoluteString();
  1915. outHTML->appendBytes("<A HREF=", 8);
  1916. outHTML->appendBytes(replacedURL.c_str(), replacedURL.length());
  1917. outHTML->appendBytes(">", 1);
  1918. ptr = end+1;
  1919. continue;
  1920. }
  1921. }
  1922. }
  1923. outHTML->appendBytes(ptr++, 1);
  1924. }
  1925. }
  1926. }
  1927. else {
  1928. log_printf(0,"curl error: %s (%s)\n", curl_easy_strerror(res), url);
  1929. statusCode = 503;
  1930. }
  1931. }
  1932. if(statusCode == 200) {
  1933. std::ostringstream ss;
  1934. ss << "Content-Length: " << outHTML->length << "\r\n";
  1935. sendBasicHeaders(statusCode,"OK",socketToClient);
  1936. if(0 >= socketToClient->writeString("Content-Type: text/html\r\n")) goto last;
  1937. if(0 >= socketToClient->writeString(ss.str())) goto last;
  1938. if(0 >= socketToClient->writeString("\r\n")) goto last;
  1939. if(strcasecmp(method, "HEAD")) {
  1940. if(outHTML->length > socketToClient->write(outHTML->bytes, outHTML->length)) goto last;
  1941. }
  1942. }
  1943. else {
  1944. sendResponse(503, "Service Unavailable", socketToClient);
  1945. statusCode = 503;
  1946. }
  1947. last:
  1948. if(curl) curl_easy_reset(curl);
  1949. if(dat) delete dat;
  1950. if(outHTML) delete outHTML;
  1951. return statusCode;
  1952. }
  1953. int BBS2chProxyConnection::bbsCgiProxy(const char *url, BBS2chProxyHttpHeaders &requestHeaders, const char *requestBody)
  1954. {
  1955. long statusCode = 0;
  1956. std::string hostStr = requestHeaders.get("Host");
  1957. std::string boardStr;
  1958. std::string threadStr;
  1959. requestHeaders.remove("Host");
  1960. if (user_agent) requestHeaders.set("User-Agent", user_agent);
  1961. if (requestBody && (lua_script || !bbscgi_headers.empty() || !bbscgi_postorder.empty())) {
  1962. std::map<std::string, std::string> fields;
  1963. const char *ptr = requestBody;
  1964. size_t bodyLength = 0;
  1965. while (1) {
  1966. const char *tmp = ptr;
  1967. while (*tmp != '=' && *tmp != 0) tmp++;
  1968. if (*tmp == 0) {
  1969. bodyLength = tmp - requestBody;
  1970. break;
  1971. }
  1972. std::string key(ptr, tmp-ptr);
  1973. tmp++;
  1974. ptr = tmp;
  1975. while (*tmp != '&' && *tmp != 0) tmp++;
  1976. std::string value(ptr, tmp-ptr);
  1977. fields.insert(std::make_pair(key, value));
  1978. if (*tmp == 0) {
  1979. bodyLength = tmp - requestBody;
  1980. break;
  1981. }
  1982. ptr = tmp + 1;
  1983. }
  1984. std::map<std::string, std::string>::iterator it;
  1985. if (it = fields.find("bbs"), it != fields.end()) boardStr = it->second;
  1986. if (it = fields.find("key"), it != fields.end()) threadStr = it->second;
  1987. if (!bbscgi_postorder.empty()) {
  1988. std::string newBody;
  1989. for (std::vector<std::string>::iterator it2 = bbscgi_postorder.begin(); it2 != bbscgi_postorder.end(); it2++) {
  1990. const std::string &name = *it2;
  1991. if (it = fields.find(name), it != fields.end()) {
  1992. if (!newBody.empty()) newBody.append("&");
  1993. newBody.append(name);
  1994. newBody.append("=");
  1995. newBody.append(it->second);
  1996. fields.erase(name);
  1997. }
  1998. }
  1999. for (it = fields.begin(); it != fields.end(); it++) {
  2000. if (!newBody.empty()) newBody.append("&");
  2001. newBody.append(it->first);
  2002. newBody.append("=");
  2003. newBody.append(it->second);
  2004. }
  2005. if (bodyLength == newBody.length()) {
  2006. strcpy((char *)requestBody, newBody.c_str());
  2007. log_printf(1, "Reordered request body is: %s\n", requestBody);
  2008. }
  2009. else {
  2010. log_printf(0, "Error occured while reordering the request body - skipping\n");
  2011. }
  2012. }
  2013. }
  2014. if (!bbscgi_headers.empty()) {
  2015. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it!=bbscgi_headers.end(); it++) {
  2016. /* we cannot use a reference here, because the original string shouldn't be replaced */
  2017. std::string value = it->second;
  2018. if (!hostStr.empty()) {
  2019. std::string::size_type pos = value.find("%HOST%");
  2020. while (pos != std::string::npos) {
  2021. value.replace(pos, 6, hostStr);
  2022. pos = value.find("%HOST%", pos+hostStr.length());
  2023. }
  2024. }
  2025. if (!boardStr.empty()) {
  2026. std::string::size_type pos = value.find("%BOARD%");
  2027. while (pos != std::string::npos) {
  2028. value.replace(pos, 7, boardStr);
  2029. pos = value.find("%BOARD%", pos+boardStr.length());
  2030. }
  2031. }
  2032. if (!threadStr.empty()) {
  2033. std::string::size_type pos = value.find("%THREAD%");
  2034. while (pos != std::string::npos) {
  2035. value.replace(pos, 8, threadStr);
  2036. pos = value.find("%THREAD%", pos+threadStr.length());
  2037. }
  2038. }
  2039. requestHeaders.set(it->first, value);
  2040. log_printf(1, "Appended custom header \"%s: %s\"\n", it->first.c_str(), value.c_str());
  2041. }
  2042. }
  2043. for (int run=0; run<2; run++) {
  2044. BBS2chProxyHttpHeaders *_headers = new BBS2chProxyHttpHeaders(requestHeaders);
  2045. curl_slist *headersForCurl = NULL;
  2046. char *_body = (char *)requestBody;
  2047. std::string nic, forceProxy;
  2048. long verbose = 0;
  2049. status = 0;
  2050. monaKeyForRequest = "";
  2051. #ifdef USE_LUA
  2052. if (lua_script) {
  2053. lua_State* l = luaL_newstate();
  2054. luaL_openlibs(l);
  2055. if (luaL_loadfile(l, lua_script) != LUA_OK) {
  2056. log_printf(0, "Lua: Failed to open script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  2057. goto lua_end;
  2058. }
  2059. lua_newtable(l);
  2060. lua_pushcfunction(l, lua_hmacSHA256);
  2061. lua_setfield(l, -2, "hmacSHA256");
  2062. lua_pushcfunction(l, lua_decodeURIComponent);
  2063. lua_setfield(l, -2, "decodeURIComponent");
  2064. lua_pushcfunction(l, lua_encodeURIComponent);
  2065. lua_setfield(l, -2, "encodeURIComponent");
  2066. lua_pushcfunction(l, lua_convertShiftJISToUTF8);
  2067. lua_setfield(l, -2, "convertShiftJISToUTF8");
  2068. lua_pushcfunction(l, lua_isExpiredKey);
  2069. lua_setfield(l, -2, "isExpiredKey");
  2070. lua_pushcfunction(l, lua_isValidAsUTF8);
  2071. lua_setfield(l, -2, "isValidAsUTF8");
  2072. lua_pushcfunction(l, lua_getMonaKey);
  2073. lua_setfield(l, -2, "getMonaKey");
  2074. lua_pushstring(l, keyManager.getKey().c_str());
  2075. lua_setfield(l, -2, "monaKey");
  2076. lua_pushinteger(l, serverPort);
  2077. lua_setfield(l, -2, "port");
  2078. lua_setglobal(l, "proxy2ch");
  2079. BBS2chProxyHttpHeaders::getClassDefinitionForLua(l);
  2080. lua_setglobal(l, "HttpHeaders");
  2081. if (lua_pcall(l, 0, 0, 0) != LUA_OK) {
  2082. log_printf(0, "Lua: Failed to run script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  2083. goto lua_end;
  2084. }
  2085. lua_getglobal(l, "willSendRequestToBbsCgi");
  2086. if (!lua_isfunction(l, -1)) {
  2087. log_printf(0, "Lua: willSendRequestToBbsCgi function does not exist in the script\n");
  2088. goto lua_end;
  2089. }
  2090. lua_newtable(l);
  2091. _headers->getUserdataForLua(l);
  2092. lua_setfield(l, -2, "headers");
  2093. lua_pushstring(l, _body);
  2094. lua_setfield(l, -2, "body");
  2095. lua_pushstring(l, hostStr.c_str());
  2096. lua_pushstring(l, boardStr.c_str());
  2097. lua_pushstring(l, threadStr.c_str());
  2098. if (lua_pcall(l, 4, 1, 0) != LUA_OK) {
  2099. log_printf(0, "Lua: Failed to call willSendRequestToBbsCgi function:\n %s\n", lua_tostring(l, -1));
  2100. goto lua_end;
  2101. }
  2102. if (!lua_istable(l, -1)) {
  2103. log_printf(0, "Lua: A return type of willSendRequestToBbsCgi function should be a table\n");
  2104. goto lua_end;
  2105. }
  2106. lua_pushstring(l, "body");
  2107. lua_rawget(l, -2);
  2108. if (lua_isstring(l, -1)) {
  2109. const char *newBody = lua_tostring(l, -1);
  2110. _body = strdup(newBody);
  2111. log_printf(1, "Lua: Set request body \"%s\"\n", newBody);
  2112. }
  2113. lua_pop(l, 1);
  2114. lua_pushstring(l, "headers");
  2115. lua_rawget(l, -2);
  2116. if (lua_istable(l, -1)) {
  2117. delete _headers;
  2118. _headers = new BBS2chProxyHttpHeaders();
  2119. lua_pushnil(l);
  2120. while (lua_next(l, -2)) {
  2121. if (lua_isstring(l, -1) && lua_isstring(l, -2)) {
  2122. const char *name = lua_tostring(l, -2);
  2123. const char *value = lua_tostring(l, -1);
  2124. _headers->add(name, value);
  2125. log_printf(1, "Lua: Set request header \"%s: %s\"\n", name, value);
  2126. }
  2127. lua_pop(l, 1);
  2128. }
  2129. }
  2130. else if (lua_isuserdata(l, -1)) {
  2131. if (lua_getmetatable(l, -1)) {
  2132. if (lua_getfield(l, -1, "_type") == LUA_TSTRING) {
  2133. if (!strcmp(lua_tostring(l, -1), "HttpHeaders")) {
  2134. BBS2chProxyHttpHeaders *newHeaders = *((BBS2chProxyHttpHeaders **)lua_touserdata(l, -3));
  2135. if (newHeaders != _headers) {
  2136. /* remove metatable to prevent the object from garbage collected by lua */
  2137. lua_newtable(l);
  2138. lua_setmetatable(l, -4);
  2139. delete _headers;
  2140. _headers = newHeaders;
  2141. }
  2142. for (std::map<std::string, PBBS2chProxyHttpHeaderEntry>::iterator it = _headers->getMap().begin(); it != _headers->getMap().end(); it++) {
  2143. log_printf(1, "Lua: Set request header \"%s\"\n", it->second->getFull().c_str());
  2144. }
  2145. }
  2146. }
  2147. lua_pop(l, 2);
  2148. }
  2149. }
  2150. lua_pop(l, 1);
  2151. lua_pushstring(l, "options");
  2152. lua_rawget(l, -2);
  2153. if (lua_istable(l, -1)) {
  2154. lua_pushstring(l, "interface");
  2155. lua_rawget(l, -2);
  2156. if (lua_isstring(l, -1)) {
  2157. nic = std::string(lua_tostring(l, -1));
  2158. }
  2159. lua_pop(l, 1);
  2160. lua_pushstring(l, "verbose");
  2161. lua_rawget(l, -2);
  2162. if (lua_isboolean(l, -1)) {
  2163. verbose = lua_toboolean(l, -1);
  2164. }
  2165. lua_pop(l, 1);
  2166. lua_pushstring(l, "proxy");
  2167. lua_rawget(l, -2);
  2168. if (lua_isstring(l, -1)) {
  2169. forceProxy = std::string(lua_tostring(l, -1));
  2170. }
  2171. }
  2172. lua_end:
  2173. lua_close(l);
  2174. }
  2175. #endif
  2176. do {
  2177. bool isPink = hostStr.find("bbspink.com") != std::string::npos;
  2178. bool shouldSign = appKey && (((api_mode & 2) && !isPink) || (api_mode & 4));
  2179. bool shouldConvertBodyToUTF8 = (bbscgi_utf8 == 1 && shouldSign) || (bbscgi_utf8 == 2);
  2180. userAgentForRequest = _headers->get("User-Agent");
  2181. if (userAgentForRequest.empty() && user_agent) userAgentForRequest = user_agent;
  2182. if (_headers->has("X-MonaKey")) {
  2183. monaKeyForRequest = _headers->get("X-MonaKey");
  2184. }
  2185. if (shouldConvertBodyToUTF8 && !_headers->has("X-PostSig")) {
  2186. std::string newBody = convertBodyToUTF8(_body);
  2187. if (!newBody.empty()) {
  2188. if (_body != requestBody) {
  2189. free(_body);
  2190. }
  2191. _body = strdup(newBody.c_str());
  2192. log_printf(1, "Converted request body to UTF-8: %s\n", _body);
  2193. }
  2194. else {
  2195. log_printf(1, "Request body seems already to be UTF-8, will be sent without conversion\n");
  2196. }
  2197. std::string contentType = _headers->get("Content-Type");
  2198. std::transform(contentType.begin(), contentType.end(), contentType.begin(), tolower);
  2199. if (contentType.find("charset=utf-8") == std::string::npos) {
  2200. _headers->set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
  2201. log_printf(1, "Appended header \"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\"\n");
  2202. }
  2203. }
  2204. if (shouldSign && (!lua_script || !_headers->has("X-PostSig"))) {
  2205. if (!userAgentForRequest.empty()) {
  2206. monaKeyForRequest = keyManager.getKey(userAgentForRequest);
  2207. appendPostSignature(_body, userAgentForRequest, monaKeyForRequest, _headers);
  2208. } else {
  2209. log_printf(0, "API: User-Agent muse be set explicitly to post with API.\n");
  2210. }
  2211. }
  2212. if (!monaKeyForRequest.empty()) {
  2213. double wait = keyManager.secondsToWaitBeforePosting(monaKeyForRequest);
  2214. if (wait > 0) {
  2215. log_printf(1, "Sleeping for %.1f seconds to avoid posting too fast...\n", wait);
  2216. #ifdef _WIN32
  2217. Sleep(wait * 1e+3);
  2218. #else
  2219. usleep(wait * 1e+6);
  2220. #endif
  2221. }
  2222. }
  2223. headersForCurl = _headers->appendToCurlSlist(headersForCurl);
  2224. if (!_headers->has("Expect")) headersForCurl = curl_slist_append(headersForCurl, "Expect:");
  2225. if (!_headers->has("Accept")) headersForCurl = curl_slist_append(headersForCurl, "Accept:");
  2226. } while (0);
  2227. if (curl) {
  2228. CURLcode res;
  2229. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  2230. curl_easy_setopt(curl, CURLOPT_URL, url);
  2231. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  2232. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  2233. if (run == 0)
  2234. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_bbscgi);
  2235. else
  2236. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  2237. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  2238. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  2239. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  2240. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  2241. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, _body);
  2242. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  2243. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  2244. curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose);
  2245. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  2246. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  2247. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  2248. if (!nic.empty()) curl_easy_setopt(curl, CURLOPT_INTERFACE, nic.c_str());
  2249. if (user_agent) {
  2250. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  2251. }
  2252. if (!forceProxy.empty()) {
  2253. curl_easy_setopt(curl, CURLOPT_PROXY, forceProxy.c_str());
  2254. }
  2255. else if (proxy_server) {
  2256. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  2257. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  2258. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  2259. }
  2260. res = curl_easy_perform(curl);
  2261. if (res != CURLE_OK) {
  2262. if (res == CURLE_WRITE_ERROR && status == 2) {
  2263. log_printf(1, "MonaKey should be reset. Sending the same request automatically...\n");
  2264. curl_easy_reset(curl);
  2265. curl_slist_free_all(headersForCurl);
  2266. delete _headers;
  2267. if (_body != requestBody) free(_body);
  2268. continue;
  2269. }
  2270. else {
  2271. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  2272. if (!status) sendResponse(503, "Service Unavailable", socketToClient);
  2273. statusCode = 503;
  2274. }
  2275. }
  2276. else {
  2277. if (isResponseChunked) {
  2278. socketToClient->writeString("0\r\n\r\n");
  2279. }
  2280. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  2281. }
  2282. curl_easy_reset(curl);
  2283. }
  2284. curl_slist_free_all(headersForCurl);
  2285. delete _headers;
  2286. if (_body != requestBody) free(_body);
  2287. break;
  2288. }
  2289. return statusCode;
  2290. }
  2291. void BBS2chProxyConnection::compileRegex(void)
  2292. {
  2293. static int compiled;
  2294. if (compiled) return;
  2295. regcomp(&regex, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/dat/([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2296. regcomp(&regex_kako, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/kako/[0-9]+/([0-9]+/)?([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2297. regcomp(&regex_offlaw, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/test/offlaw2.so\\?.*bbs=([^&]+)", REG_EXTENDED|REG_ICASE);
  2298. regcomp(&regex_api, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/([^/]+)/([^/]+)/([0-9]+)", REG_EXTENDED|REG_ICASE);
  2299. regcomp(&regex_api_auth, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/auth/?$", REG_EXTENDED|REG_ICASE);
  2300. compiled = 1;
  2301. }