BBS2chProxyConnection.cpp 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311
  1. #include <string>
  2. #include <vector>
  3. #include <map>
  4. #include <set>
  5. #include <sstream>
  6. #include <stdexcept>
  7. #include <algorithm>
  8. #include <pthread.h>
  9. #include <time.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <unistd.h>
  13. #ifdef USE_LUA
  14. #include <lua.hpp>
  15. #endif
  16. #ifdef _WIN32
  17. #include <fcntl.h>
  18. #include <winsock2.h>
  19. #include <ws2tcpip.h>
  20. #include <mswsock.h>
  21. #define CLOSESOCKET(x) closesocket(x)
  22. #else
  23. #include <sys/socket.h>
  24. #include <netinet/in.h>
  25. #include <netdb.h>
  26. #include <arpa/inet.h>
  27. #include <poll.h>
  28. #define CLOSESOCKET(x) close(x)
  29. #endif
  30. #include "BBS2chProxyConnection.h"
  31. #include "DataStorage.h"
  32. #include "hmac.h"
  33. #include "stringEncodingConverter.h"
  34. #include "BBS2chProxyRawSocket.h"
  35. #ifdef USE_MITM
  36. #include "BBS2chProxySecureSocket.h"
  37. #endif
  38. //#define DEBUG 1
  39. extern char *proxy_server;
  40. extern long proxy_port;
  41. extern long proxy_type;
  42. extern long timeout;
  43. extern char *user_agent;
  44. extern char *api_ua_dat;
  45. extern char *x_2ch_ua_dat;
  46. extern char *appKey;
  47. extern char *hmacKey;
  48. extern int allow_chunked;
  49. extern int curl_features;
  50. extern unsigned int curl_version_number;
  51. extern bool accept_https;
  52. extern int force_5chnet;
  53. extern int force_5chnet_https;
  54. extern int force_ipv4;
  55. extern char *bbsmenu_url;
  56. extern char *api_server;
  57. extern std::map<std::string, std::string> bbscgi_headers;
  58. extern int gikofix;
  59. extern CURLSH *curl_share;
  60. extern char *lua_script;
  61. extern unsigned int api_mode;
  62. extern std::vector<std::string> bbscgi_postorder;
  63. extern unsigned int bbscgi_utf8;
  64. extern int api_override;
  65. #ifdef USE_MITM
  66. extern unsigned int mitm_mode;
  67. #endif
  68. extern void log_printf(int level, const char *format ...);
  69. #include "utils.h"
  70. BBS2chProxyKeyManager BBS2chProxyConnection::keyManager;
  71. BBS2chProxyAuth BBS2chProxyConnection::auth;
  72. static regex_t regex;
  73. static regex_t regex_kako;
  74. static regex_t regex_offlaw;
  75. static regex_t regex_api;
  76. static regex_t regex_api_auth;
  77. #ifdef USE_LUA
  78. extern "C" {
  79. static int lua_hmacSHA256(lua_State *l)
  80. {
  81. static const char *table = "0123456789abcdef";
  82. size_t keyLength, dataLength;
  83. const char *key = luaL_checklstring(l, 1, &keyLength);
  84. const char *data = luaL_checklstring(l, 2, &dataLength);
  85. if (!key || !data) return 0;
  86. unsigned char digest[32];
  87. char digestStr[65];
  88. proxy2ch_HMAC_SHA256(key, keyLength, data, dataLength, digest);
  89. for (int i=0; i<32; i++) {
  90. unsigned char c = digest[i];
  91. unsigned char upper = (c >> 4) & 0xf;
  92. unsigned char lower = c & 0xf;
  93. digestStr[i*2] = table[upper];
  94. digestStr[i*2+1] = table[lower];
  95. }
  96. digestStr[64] = 0;
  97. lua_pushstring(l, digestStr);
  98. return 1;
  99. }
  100. static int lua_decodeURIComponent(lua_State *l)
  101. {
  102. size_t length;
  103. const char *input = luaL_checklstring(l, 1, &length);
  104. if (!input) return 0;
  105. bool decodePlus = true;
  106. if (!lua_isnoneornil(l, 2)) {
  107. decodePlus = (lua_toboolean(l, 2));
  108. }
  109. std::string output = decodeURIComponent(input, length, decodePlus);
  110. lua_pushstring(l, output.c_str());
  111. return 1;
  112. }
  113. static int lua_encodeURIComponent(lua_State *l)
  114. {
  115. size_t length;
  116. const char *input = luaL_checklstring(l, 1, &length);
  117. if (!input) return 0;
  118. bool spaceAsPlus = true;
  119. if (!lua_isnoneornil(l, 2)) {
  120. spaceAsPlus = (lua_toboolean(l, 2));
  121. }
  122. std::string output = encodeURIComponent(input, length, spaceAsPlus);
  123. lua_pushstring(l, output.c_str());
  124. return 1;
  125. }
  126. static int lua_convertShiftJISToUTF8(lua_State *l)
  127. {
  128. size_t length;
  129. const char *input = luaL_checklstring(l, 1, &length);
  130. if (!input) return 0;
  131. if (length > 0) {
  132. char *output = convertShiftJISToUTF8(input, length);
  133. if (!output) lua_pushnil(l);
  134. else {
  135. lua_pushstring(l, output);
  136. free(output);
  137. }
  138. }
  139. else lua_pushstring(l, "");
  140. return 1;
  141. }
  142. static int lua_isExpiredKey(lua_State *l)
  143. {
  144. size_t length;
  145. const char *input = luaL_checklstring(l, 1, &length);
  146. if (!input) return 0;
  147. if (BBS2chProxyConnection::keyManager.isExpired(input)) {
  148. lua_pushboolean(l, 1);
  149. }
  150. else lua_pushboolean(l, 0);
  151. return 1;
  152. }
  153. static int lua_isValidAsUTF8(lua_State *l)
  154. {
  155. size_t length;
  156. const char *input = luaL_checklstring(l, 1, &length);
  157. if (!input) return 0;
  158. lua_pushboolean(l, isValidAsUTF8(input, length));
  159. return 1;
  160. }
  161. static int lua_getMonaKey(lua_State *l)
  162. {
  163. size_t length;
  164. const char *input = luaL_checklstring(l, 1, &length);
  165. if (!input) return 0;
  166. const std::string &key = BBS2chProxyConnection::keyManager.getKey(input);
  167. lua_pushstring(l, key.c_str());
  168. return 1;
  169. }
  170. }
  171. #endif
  172. void BBS2chProxyConnection::run(void * (*func)(void *))
  173. {
  174. pthread_t thread;
  175. pthread_attr_t thread_attr;
  176. pthread_attr_init(&thread_attr);
  177. pthread_attr_setdetachstate(&thread_attr , PTHREAD_CREATE_DETACHED);
  178. if(0 != pthread_create(&thread , &thread_attr , func , this))
  179. perror("pthread_create");
  180. pthread_attr_destroy(&thread_attr);
  181. }
  182. struct TunnelSockets {
  183. int sock_c;
  184. int sock_s;
  185. std::string addr;
  186. int port;
  187. };
  188. static void *tunnelMain(void *param)
  189. {
  190. TunnelSockets *sockets = (TunnelSockets *)param;
  191. char *buf = new char[16384];
  192. #ifdef _WIN32
  193. fd_set fds;
  194. int nfds = sockets->sock_c > sockets->sock_s ? sockets->sock_c + 1 : sockets->sock_s + 1;
  195. #else
  196. struct pollfd fds[2];
  197. memset(fds, 0, sizeof(fds));
  198. fds[0].fd = sockets->sock_c;
  199. fds[0].events = POLLIN;
  200. fds[1].fd = sockets->sock_s;
  201. fds[1].events = POLLIN;
  202. #endif
  203. while (1) {
  204. #ifdef _WIN32
  205. FD_ZERO(&fds);
  206. FD_SET(sockets->sock_c, &fds);
  207. FD_SET(sockets->sock_s, &fds);
  208. if (select(nfds, &fds, NULL, NULL, NULL) < 0) break;
  209. if (FD_ISSET(sockets->sock_c, &fds)) {
  210. int ret = recv(sockets->sock_c, buf, 16384, 0);
  211. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  212. else if (ret <= 0) break;
  213. }
  214. if (FD_ISSET(sockets->sock_s, &fds)) {
  215. int ret = recv(sockets->sock_s, buf, 16384, 0);
  216. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  217. else if (ret <= 0) break;
  218. }
  219. #else
  220. if (poll(fds, 2, -1) < 0) break;
  221. if (fds[0].revents & POLLIN) {
  222. int ret = recv(sockets->sock_c, buf, 16384, 0);
  223. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  224. else if (ret <= 0) break;
  225. }
  226. else if (fds[0].revents != 0) break;
  227. if (fds[1].revents & POLLIN) {
  228. int ret = recv(sockets->sock_s, buf, 16384, 0);
  229. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  230. else if (ret <= 0) break;
  231. }
  232. else if (fds[1].revents != 0) break;
  233. #endif
  234. }
  235. CLOSESOCKET(sockets->sock_c);
  236. CLOSESOCKET(sockets->sock_s);
  237. log_printf(1, "Finished tunneling to %s:%d\n", sockets->addr.c_str(), sockets->port);
  238. delete sockets;
  239. delete[] buf;
  240. return NULL;
  241. }
  242. int BBS2chProxyConnection::tunnel(const char *addr, int port)
  243. {
  244. struct sockaddr_in server;
  245. memset(&server, 0, sizeof(server));
  246. server.sin_family = AF_INET;
  247. server.sin_addr.s_addr = inet_addr(addr);
  248. server.sin_port = htons(port);
  249. if(server.sin_addr.s_addr == 0xffffffff) {
  250. struct hostent *host;
  251. host = gethostbyname(addr);
  252. if (host == NULL) {
  253. log_printf(0, "Failed to lookup hostname %s\n", addr);
  254. sendResponse(400, "Bad Request", socketToClient);
  255. return 400;
  256. }
  257. server.sin_addr.s_addr = *(unsigned int *)host->h_addr_list[0];
  258. }
  259. log_printf(1,"Tunneling connection to %s:%d\n",addr,port);
  260. int sock_s = socket(AF_INET, SOCK_STREAM, 0);
  261. if(-1 == ::connect(sock_s, (struct sockaddr *)&server, sizeof(server))) {
  262. perror("connect");
  263. sendResponse(400, "Bad Request", socketToClient);
  264. return 400;
  265. }
  266. send(sock_c, "HTTP/1.1 200 Connection established\r\n\r\n", 39, 0);
  267. TunnelSockets *sockets = new TunnelSockets();
  268. sockets->sock_c = sock_c;
  269. sockets->sock_s = sock_s;
  270. sockets->addr = addr;
  271. sockets->port = port;
  272. pthread_t thread;
  273. if(0 != pthread_create(&thread, NULL, tunnelMain, sockets))
  274. perror("pthread_create");
  275. pthread_detach(thread);
  276. return 0;
  277. }
  278. void BBS2chProxyConnection::connect(void)
  279. {
  280. char method[32], url[1024], protocol[32];
  281. int i;
  282. char *buf, *ptr;
  283. unsigned int datProxyMode = 0; // 0: no dat, 1: read.cgi or API, 2: force API, 3: kakolog
  284. regmatch_t match[7];
  285. long statusCode = 0;
  286. BBS2chProxyURL baseURL;
  287. BBS2chProxyHttpHeaders requestHeaders;
  288. socketToClient = new BBS2chProxyRawSocket(sock_c);
  289. buf = (char *)malloc(16384);
  290. if(!buf) goto end;
  291. beginHandleRequest:
  292. ptr = buf;
  293. if(!socketToClient->readLine(buf, 1024)) {
  294. sendResponse(400, "Bad Request", socketToClient);
  295. statusCode = 400;
  296. goto end;
  297. }
  298. i=0;
  299. while(*ptr != ' ' && *ptr != 0 && i < 32) method[i++] = *ptr++;
  300. if(*ptr == 0 || i == 32) {
  301. sendResponse(400, "Bad Request", socketToClient);
  302. statusCode = 400;
  303. goto end;
  304. }
  305. method[i] = 0;
  306. ptr++;
  307. i=0;
  308. while(*ptr != ' ' && *ptr != 0 && i < 1024) url[i++] = *ptr++;
  309. if(*ptr == 0 || i == 1024) {
  310. sendResponse(400, "Bad Request", socketToClient);
  311. statusCode = 400;
  312. goto end;
  313. }
  314. url[i] = 0;
  315. ptr++;
  316. i=0;
  317. while(*ptr != '\r' && *ptr != '\n' && *ptr != 0 && i < 32) protocol[i++] = *ptr++;
  318. if(*ptr == 0 || i == 32) {
  319. sendResponse(400, "Bad Request", socketToClient);
  320. statusCode = 400;
  321. goto end;
  322. }
  323. protocol[i] = 0;
  324. if(!strncasecmp(protocol,"HTTP/1.0",8)) {
  325. isClientHttp1_0 = true;
  326. }
  327. else isClientHttp1_0 = false;
  328. log_printf(1, "Received %s %s %s\n",method,url,protocol);
  329. if(strcasecmp(method,"GET") && strcasecmp(method,"POST") && strcasecmp(method,"HEAD") && strcasecmp(method,"CONNECT") && strcasecmp(method,"PUT") && strcasecmp(method, "OPTIONS")) {
  330. sendResponse(400, "Bad Request", socketToClient);
  331. statusCode = 400;
  332. goto end;
  333. }
  334. if(!url[0]) {
  335. sendResponse(400, "Bad Request", socketToClient);
  336. statusCode = 400;
  337. goto end;
  338. }
  339. if(strncasecmp(protocol,"HTTP",4)) {
  340. sendResponse(400, "Bad Request", socketToClient);
  341. statusCode = 400;
  342. goto end;
  343. }
  344. if(!strcasecmp(method,"CONNECT")) {
  345. if(!accept_https || baseURL.isValid()) {
  346. sendResponse(400, "Bad Request", socketToClient);
  347. statusCode = 400;
  348. goto end;
  349. }
  350. while(socketToClient->readLine(buf, 16384)) {
  351. if(!strcmp("\r\n",buf)) break;
  352. }
  353. int port = 443;
  354. char *ptr = strchr(url, ':');
  355. if(ptr) {
  356. *ptr = 0;
  357. port = atoi(ptr+1);
  358. }
  359. #ifdef USE_MITM
  360. bool useMITM = false;
  361. if (mitm_mode) {
  362. baseURL = BBS2chProxyURL("https", url);
  363. if (mitm_mode == 2) useMITM = true;
  364. else if (mitm_mode == 1 && baseURL.isFamilyOf5chNet()) useMITM = true;
  365. }
  366. if (useMITM) {
  367. socketToClient->writeString("HTTP/1.1 200 Connection established\r\n\r\n");
  368. if (port == 80) {
  369. baseURL.setScheme("http");
  370. goto beginHandleRequest;
  371. }
  372. else {
  373. try {
  374. BBS2chProxySecureSocket *secureSocket = new BBS2chProxySecureSocket(sock_c, url);
  375. delete socketToClient;
  376. socketToClient = secureSocket;
  377. if (port != 443) baseURL.setPort(port);
  378. isHttps = true;
  379. goto beginHandleRequest;
  380. } catch (const std::runtime_error& e) {
  381. log_printf(0, "%s\n", e.what());
  382. sendResponse(400, "Bad Request", socketToClient);
  383. statusCode = 400;
  384. goto end;
  385. }
  386. }
  387. }
  388. else
  389. #endif
  390. {
  391. statusCode = tunnel(url, port);
  392. /* if a return value is non-zero, tunnel function failed to establish a tunnelling connection */
  393. if (statusCode == 0) {
  394. delete socketToClient;
  395. socketToClient = NULL;
  396. }
  397. goto end;
  398. }
  399. }
  400. #if USE_MITM
  401. if (baseURL.isValid()) {
  402. requestURL = BBS2chProxyURL(baseURL, url);
  403. log_printf(1, "Running as MITM proxy for %s\n", requestURL.absoluteString().c_str());
  404. } else
  405. #endif
  406. requestURL = BBS2chProxyURL(url);
  407. if (!requestURL.isHttp()) {
  408. sendResponse(400, "Bad Request", socketToClient);
  409. statusCode = 400;
  410. goto end;
  411. }
  412. if (force_5chnet) {
  413. if (requestURL.getHost() != "menu.2ch.net" && requestURL.replaceHost("2ch.net", "5ch.net")) {
  414. force5ch = true;
  415. log_printf(1, "Detected *.2ch.net URL, changed target URL to %s\n", requestURL.absoluteString().c_str());
  416. }
  417. }
  418. /* parse request headers */
  419. while (socketToClient->readLine(buf, 16384)) {
  420. if (!strcmp("\r\n",buf)) break;
  421. requestHeaders.add(buf);
  422. }
  423. if (requestHeaders.hasNameAndValue("Transfer-Encoding", "chunked")) {
  424. isClientChunked = true;
  425. }
  426. else if (requestHeaders.has("Content-Length")) {
  427. content_length = atoi(requestHeaders.get("Content-Length").c_str());
  428. }
  429. if (requestHeaders.has("Expect")) {
  430. if (!strcasecmp(requestHeaders.get("Expect").c_str(), "100-continue") && !isClientHttp1_0) {
  431. log_printf(1, "Received Expect: 100-continue header, sending 100 Continue response to the client\n");
  432. socketToClient->writeString("HTTP/1.1 100 Continue\r\n\r\n");
  433. }
  434. }
  435. if (regexec(&regex, requestURL.absoluteString().c_str(), 6, match, 0) != REG_NOMATCH) {
  436. if ((appKey && (api_mode & 1)) || !requestURL.hostStartsWith("headline.")) datProxyMode = 1;
  437. }
  438. else if (regexec(&regex_kako, requestURL.absoluteString().c_str(), 7, match, 0) != REG_NOMATCH) {
  439. datProxyMode = 3;
  440. }
  441. else if (regexec(&regex_offlaw, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  442. const char *tmp = requestURL.absoluteString().c_str();
  443. const char *thread = strstr(tmp, "key=");
  444. if (thread) {
  445. match[6].rm_so = thread+4-tmp;
  446. match[6].rm_eo = thread+4-tmp;
  447. const char *ptr = thread+4;
  448. while (*ptr != '&' && *ptr != 0) {
  449. ptr++;
  450. match[6].rm_eo++;
  451. }
  452. if (match[6].rm_so != match[6].rm_eo) datProxyMode = 3;
  453. }
  454. }
  455. else if (api_override && appKey) {
  456. if (regexec(&regex_api, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  457. datProxyMode = 2;
  458. match[1] = match[2];
  459. match[5] = match[4];
  460. match[4] = match[3];
  461. }
  462. else if (regexec(&regex_api_auth, requestURL.absoluteString().c_str(), 2, match, 0) != REG_NOMATCH) {
  463. /* return dummy response immediately */
  464. log_printf(1, "Returning dummy response because API overriding is enabled\n");
  465. statusCode = 200;
  466. sendBasicHeaders(200, "OK", socketToClient);
  467. if (0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto end;
  468. if (0 >= socketToClient->writeString("Content-Length: 203\r\n\r\n")) goto end;
  469. if (0 >= socketToClient->writeString("SESSION-ID=Monazilla/1.00:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")) goto end;
  470. goto end;
  471. }
  472. }
  473. if (datProxyMode) {
  474. // match[1] : hostPrefix
  475. // match[2] : host
  476. // match[4] : bbs
  477. // match[5 or 6] : key
  478. const char *original = requestURL.absoluteString().c_str();
  479. const regmatch_t *threadMatch = datProxyMode != 3 ? match+5 : match+6;
  480. std::string board(original + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
  481. std::string thread(original + threadMatch->rm_so, threadMatch->rm_eo - threadMatch->rm_so);
  482. if (!appKey || (datProxyMode == 1 && !(api_mode & 1)) || datProxyMode == 3) {
  483. log_printf(1, "Retrieving thread via read.cgi...\n");
  484. std::string host(original + match[1].rm_so, match[2].rm_eo - match[1].rm_so);
  485. threadKey = host;
  486. threadKey += '/';
  487. threadKey += board;
  488. threadKey += '/';
  489. threadKey += thread;
  490. std::string targetURL = (force_5chnet_https || isHttps) ? "https://" : "http://";
  491. targetURL += host;
  492. targetURL += "/test/read.cgi/";
  493. targetURL += board;
  494. targetURL += '/';
  495. targetURL += thread;
  496. targetURL += '/';
  497. if (force_5chnet_https) isHttps = true;
  498. statusCode = datProxy(targetURL.c_str(), method, requestHeaders);
  499. }
  500. else {
  501. log_printf(1, "Retrieving thread via API...\n");
  502. std::string hostPrefix(original + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
  503. std::string targetURL = "https://";
  504. targetURL += api_server;
  505. targetURL += "/v1/";
  506. targetURL += hostPrefix;
  507. targetURL += '/';
  508. targetURL += board;
  509. targetURL += '/';
  510. targetURL += thread;
  511. isHttps = true;
  512. statusCode = datProxyAPI(targetURL.c_str(), method, requestHeaders);
  513. }
  514. }
  515. else {
  516. if (force_5chnet_https && !isHttps && requestURL.isFamilyOf5chNet()) {
  517. requestURL.setScheme("https");
  518. isHttps = true;
  519. log_printf(1, "The host %s is 5ch.net family, connecting with HTTPS\n", requestURL.getHost().c_str());
  520. }
  521. if (bbsmenu_url && requestURL.equals(BBS2chProxyURL(bbsmenu_url), true)) {
  522. log_printf(1, "Running as a BBS menu proxy...\n");
  523. statusCode = bbsmenuProxy(requestURL.absoluteString().c_str(), method, requestHeaders);
  524. }
  525. else {
  526. bool isPostRequest = !strcasecmp(method, "POST");
  527. bool isPutRequest = !strcasecmp(method, "PUT");
  528. if (isPostRequest && requestURL.isFamilyOf5chNet() && requestURL.pathStartsWith("/test/bbs.cgi")) bbscgi = true;
  529. if (bbscgi) log_printf(1, "Looks like a request to bbs.cgi, will be modified before sending...\n");
  530. else log_printf(1, "Not a notable request, will be forwarded to server...\n");
  531. if (force_5chnet) {
  532. if (requestHeaders.has("Host")) {
  533. std::string host = requestHeaders.get("Host");
  534. size_t pos = host.find("2ch.net");
  535. if (pos != std::string::npos && pos+7 == host.length()) {
  536. if (pos == 0 || host[pos-1] == '.') {
  537. host.replace(pos, 1, "5");
  538. requestHeaders.set("Host", host);
  539. }
  540. }
  541. }
  542. if (bbscgi && requestHeaders.has("Referer")) {
  543. std::string referrer = requestHeaders.get("Referer");
  544. size_t pos = referrer.find("2ch.net");
  545. if (pos != std::string::npos) {
  546. if (pos == 0 || referrer[pos-1] == '.') {
  547. referrer.replace(pos, 1, "5");
  548. requestHeaders.set("Referer", referrer);
  549. }
  550. }
  551. }
  552. }
  553. requestHeaders.remove("Connection");
  554. if (user_agent) requestHeaders.remove("User-Agent");
  555. if (bbscgi && (content_length > 0 || isClientChunked)) {
  556. bool isNotFormURLEncoded = false;
  557. if (requestHeaders.has("Content-Type") && requestHeaders.get("Content-Type").find("application/x-www-form-urlencoded") == std::string::npos) {
  558. isNotFormURLEncoded = true;
  559. }
  560. if (!isNotFormURLEncoded) {
  561. requestHeaders.remove("Content-Length");
  562. if (!bbscgi_headers.empty()) {
  563. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it != bbscgi_headers.end(); it++) {
  564. if (requestHeaders.has(it->first)) {
  565. log_printf(1, "Ignoring header \"%s\" because custom header will be appended\n", it->first.c_str());
  566. requestHeaders.remove(it->first);
  567. }
  568. }
  569. }
  570. char *postdata = NULL;
  571. if (isClientChunked) {
  572. content_length = readChunkedBodyIntoBuffer(&postdata, socketToClient);
  573. requestHeaders.remove("Transfer-Encoding");
  574. }
  575. else {
  576. postdata = (char *)calloc(content_length+1, 1);
  577. content_length = socketToClient->read(postdata, content_length);
  578. }
  579. if (gikofix) {
  580. char *ptr = postdata+content_length-1;
  581. while (ptr >= postdata && (*ptr == '\r' || *ptr == '\n')) {
  582. *ptr-- = 0;
  583. }
  584. }
  585. curl_slist *headersForCurl = NULL;
  586. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  587. statusCode = bbsCgiProxy(requestURL.absoluteString().c_str(), requestHeaders, postdata);
  588. free(postdata);
  589. curl_slist_free_all(headersForCurl);
  590. goto end;
  591. }
  592. }
  593. curl_slist *headersForCurl = NULL;
  594. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  595. if (!requestHeaders.has("Expect")) {
  596. headersForCurl = curl_slist_append(headersForCurl, "Expext:");
  597. }
  598. if(curl) {
  599. CURLcode res;
  600. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  601. curl_easy_setopt(curl, CURLOPT_URL, requestURL.absoluteString().c_str());
  602. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  603. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  604. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  605. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  606. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  607. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  608. if(content_length) {
  609. /* set Content-Length explicitly via API to work properly with curl >= 7.66.0 */
  610. if(isPostRequest)
  611. curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, content_length);
  612. else if(isPutRequest)
  613. curl_easy_setopt(curl, CURLOPT_INFILESIZE, content_length);
  614. }
  615. curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback_proxy);
  616. curl_easy_setopt(curl, CURLOPT_READDATA, this);
  617. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  618. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  619. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  620. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  621. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  622. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  623. if(user_agent) {
  624. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  625. }
  626. if(isPostRequest) {
  627. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  628. }
  629. else if(isPutRequest) {
  630. curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L);
  631. }
  632. else if(!strcasecmp(method, "HEAD")) {
  633. curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  634. }
  635. else if(!strcasecmp(method, "OPTIONS")) {
  636. curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "OPTIONS");
  637. }
  638. if(proxy_server) {
  639. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  640. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  641. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  642. }
  643. res = curl_easy_perform(curl);
  644. if(res != CURLE_OK) {
  645. log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), requestURL.absoluteString().c_str());
  646. if(!status) sendResponse(503, "Service Unavailable", socketToClient);
  647. statusCode = 503;
  648. }
  649. else {
  650. if(isResponseChunked) {
  651. socketToClient->writeString("0\r\n\r\n");
  652. }
  653. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  654. }
  655. curl_easy_reset(curl);
  656. }
  657. curl_slist_free_all(headersForCurl);
  658. }
  659. }
  660. end:
  661. if(statusCode) log_printf(1, "Returned status code %d to client\n",statusCode);
  662. if(buf) free(buf);
  663. if(socketToClient) socketToClient->close();
  664. }
  665. int BBS2chProxyConnection::datProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  666. {
  667. DataStorage *html = NULL;
  668. long statusCode = 0;
  669. long rangeStart = 0, rangeEnd = 0;
  670. time_t lastModified = 0;
  671. time_t ifModifiedSince = 0;
  672. char *buf = (char *)malloc(16384);
  673. if(!buf) goto last;
  674. if(requestHeaders.has("Range")) {
  675. std::string value = requestHeaders.get("Range");
  676. if(value.find("bytes=") == 0 && value.find(",") == std::string::npos) {
  677. char *ptr = (char *)value.c_str() + 6;
  678. if(*ptr == '-') {
  679. rangeStart = atoi(ptr);
  680. }
  681. else {
  682. rangeStart = strtol(ptr, &ptr, 10);
  683. if(*ptr == '-') ptr++;
  684. if(*ptr && *ptr != '\r') {
  685. rangeEnd = strtol(ptr, NULL, 10);
  686. if(rangeEnd && rangeStart > rangeEnd) {
  687. sendResponse(416, "Requested range not satisfiable", socketToClient);
  688. statusCode = 416;
  689. goto last;
  690. }
  691. }
  692. }
  693. //fprintf(stderr, "range=%ld-%ld\n",rangeStart,rangeEnd);
  694. }
  695. else {
  696. sendResponse(416, "Requested range not satisfiable", socketToClient);
  697. statusCode = 416;
  698. goto last;
  699. }
  700. }
  701. if(requestHeaders.has("If-Modified-Since")) {
  702. struct tm time_ = {};
  703. strptime(requestHeaders.get("If-Modified-Since").c_str(), httpTimestampFmt, &time_);
  704. ifModifiedSince = mktime(&time_);
  705. }
  706. if(rangeStart > 0) {
  707. PBBS2chProxyThreadInfo info;
  708. pthread_mutex_lock(mutex);
  709. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  710. if(it != threadCache->end()) {
  711. info = it->second;
  712. }
  713. pthread_mutex_unlock(mutex);
  714. log_printf(5,"range request from %ld bytes\n",rangeStart);
  715. if(info) {
  716. int from = info->lastResNum;
  717. int alreadyRead = info->cachedSize;
  718. int lastResLength = info->cachedData->length;
  719. log_printf(5,"hit %s: cached %d bytes, last res size %d\n",threadKey.c_str(),alreadyRead,lastResLength);
  720. if(rangeStart <= alreadyRead && rangeStart >= alreadyRead - lastResLength) {
  721. if(curl) {
  722. CURLcode res;
  723. DataStorage *dat = new DataStorage();
  724. log_printf(5,"partial access from res num %d\n",from);
  725. snprintf(buf,16384,"%s%d-n",url,from);
  726. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  727. curl_easy_setopt(curl, CURLOPT_URL, buf);
  728. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  729. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  730. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  731. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  732. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  733. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  734. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  735. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  736. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  737. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  738. if(proxy_server) {
  739. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  740. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  741. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  742. }
  743. if(user_agent) {
  744. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  745. }
  746. else if(requestHeaders.has("User-Agent")) {
  747. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  748. }
  749. res = curl_easy_perform(curl);
  750. if(res == CURLE_OK) {
  751. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  752. curl_easy_reset(curl);
  753. if(statusCode == 200 && dat->length) {
  754. DataStorage *updated = html2dat(dat, from, &lastModified, true);
  755. if(ifModifiedSince && lastModified && updated && updated->length == lastResLength) {
  756. struct tm time_ = {};
  757. gmtime_r(&lastModified,&time_);
  758. time_t tmp = mktime(&time_);
  759. if(ifModifiedSince >= tmp) {
  760. sendResponse(304, "Not Modified", socketToClient);
  761. log_printf(5,"not modified!\n");
  762. delete updated;
  763. delete dat;
  764. statusCode = 304;
  765. goto last;
  766. }
  767. }
  768. if(updated && updated->length && updated->length >= lastResLength) {
  769. html = new DataStorage(alreadyRead - lastResLength);
  770. html->appendBytes(updated->bytes, updated->length);
  771. if(!rangeEnd) rangeEnd = html->length - 1;
  772. if(rangeStart > rangeEnd) {
  773. sendResponse(416, "Requested range not satisfiable", socketToClient);
  774. delete updated;
  775. delete dat;
  776. statusCode = 416;
  777. goto last;
  778. }
  779. statusCode = 206;
  780. log_printf(5,"cache hit; reconstructed data length:%ld\n",(long)html->length);
  781. }
  782. else {
  783. log_printf(5,"cache misshit?\n");
  784. sendResponse(416, "Requested range not satisfiable", socketToClient);
  785. delete updated;
  786. delete dat;
  787. statusCode = 416;
  788. goto last;
  789. }
  790. delete updated;
  791. }
  792. }
  793. else {
  794. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  795. curl_easy_reset(curl);
  796. }
  797. delete dat;
  798. if(html) goto resp;
  799. }
  800. }
  801. else {
  802. log_printf(5,"invalid cache contents\n");
  803. pthread_mutex_lock(mutex);
  804. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  805. if(it != threadCache->end()) {
  806. threadCache->erase(it);
  807. }
  808. pthread_mutex_unlock(mutex);
  809. }
  810. }
  811. }
  812. {
  813. if(curl) {
  814. CURLcode res;
  815. DataStorage *dat = new DataStorage();
  816. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  817. curl_easy_setopt(curl, CURLOPT_URL, url);
  818. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  819. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  820. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  821. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  822. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  823. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  824. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  825. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  826. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  827. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  828. if(proxy_server) {
  829. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  830. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  831. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  832. }
  833. if(user_agent) {
  834. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  835. }
  836. else if(requestHeaders.has("User-Agent")) {
  837. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  838. }
  839. res = curl_easy_perform(curl);
  840. if(res != CURLE_OK) {
  841. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  842. sendResponse(503, "Service Unavailable", socketToClient);
  843. curl_easy_reset(curl);
  844. delete dat;
  845. statusCode = 503;
  846. goto last;
  847. }
  848. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  849. curl_easy_reset(curl);
  850. if(statusCode == 200) {
  851. html = html2dat(dat, 1, &lastModified, false);
  852. }
  853. delete dat;
  854. }
  855. if(!html || !html->length) {
  856. sendResponse(503, "Service Unavailable", socketToClient);
  857. statusCode = 503;
  858. goto last;
  859. }
  860. if((rangeStart || rangeEnd) && html && html->length) {
  861. if(!rangeEnd) rangeEnd = html->length - 1;
  862. if(rangeStart < 0) rangeStart = html->length + rangeStart;
  863. if(rangeStart < html->length && rangeEnd < html->length && rangeStart <= rangeEnd) {
  864. statusCode = 206;
  865. }
  866. else {
  867. if(ifModifiedSince && lastModified && rangeStart == html->length) {
  868. struct tm time_ = {};
  869. gmtime_r(&lastModified,&time_);
  870. time_t tmp = mktime(&time_);
  871. if(ifModifiedSince >= tmp) {
  872. sendResponse(304, "Not Modified", socketToClient);
  873. log_printf(5,"not modified!\n");
  874. statusCode = 304;
  875. goto last;
  876. }
  877. }
  878. sendResponse(416, "Requested range not satisfiable", socketToClient);
  879. statusCode = 416;
  880. goto last;
  881. }
  882. }
  883. }
  884. resp:
  885. if(statusCode == 206) sendBasicHeaders(statusCode,"Partial Content",socketToClient);
  886. else sendBasicHeaders(statusCode,"OK",socketToClient);
  887. if(0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto last;
  888. if(0 >= socketToClient->writeString("Accept-Ranges: bytes\r\n")) goto last;
  889. if(statusCode == 206) {
  890. std::ostringstream ss;
  891. ss << "Content-Range: bytes " << rangeStart << "-" << rangeEnd << "/" << html->length << "\r\n";
  892. if (0 >= socketToClient->writeString(ss.str())) goto last;
  893. //fprintf(stderr,"Content-Length: %ld\r\n",rangeEnd - rangeStart + 1);
  894. //fprintf(stderr,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length);
  895. DataStorage *newHtml = new DataStorage();
  896. newHtml->appendBytes(html->bytes+rangeStart, rangeEnd - rangeStart + 1);
  897. delete html;
  898. html = newHtml;
  899. }
  900. {
  901. std::ostringstream ss;
  902. ss << "Content-Length: " << html->length << "\r\n";
  903. if(0 >= socketToClient->writeString(ss.str())) goto last;
  904. }
  905. if(lastModified) {
  906. struct tm time_ = {};
  907. char date[256];
  908. gmtime_r(&lastModified,&time_);
  909. strftime(date,256,httpTimestampFmt,&time_);
  910. std::string header = "Last-Modified: ";
  911. header += date;
  912. header += "\r\n";
  913. if(0 >= socketToClient->writeString(header)) goto last;
  914. //fprintf(stderr,"Last-Modified: %s\r\n",date);
  915. }
  916. if(0 > socketToClient->writeString("\r\n")) goto last;
  917. if(html && statusCode >= 200 && statusCode < 300 && strcasecmp(method, "HEAD")) {
  918. if(html->length > socketToClient->write(html->bytes, html->length)) goto last;
  919. }
  920. last:
  921. if(buf) free(buf);
  922. if(html) delete html;
  923. return statusCode;
  924. }
  925. DataStorage *BBS2chProxyConnection::html2dat_old(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  926. {
  927. char *ptr = html->bytes;
  928. char *end = html->bytes + html->length - 1;
  929. DataStorage *txt = new DataStorage();
  930. int res = startResNum, i=0;
  931. char signature[32];
  932. char title[1024];
  933. int cachedSize = 0;
  934. bool bbspink = strstr(threadKey.c_str(),"bbspink.com") ? true : false;
  935. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  936. if(!ptr) {
  937. delete txt;
  938. return NULL;
  939. }
  940. ptr += 7;
  941. while(1) {
  942. if(*ptr == '<') {
  943. if(!strncasecmp(ptr,"</title>",8)) {
  944. ptr += 8;
  945. break;
  946. }
  947. else title[i++] = *ptr++;
  948. }
  949. else title[i++] = *ptr++;
  950. }
  951. title[i] = 0;
  952. snprintf(signature,32,"<dt>%d ",res);
  953. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  954. if(!ptr) {
  955. delete txt;
  956. return NULL;
  957. }
  958. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  959. if(!buffer) {
  960. delete txt;
  961. return NULL;
  962. }
  963. unsigned char *body = buffer;
  964. char *mail = (char *)body + 65536;
  965. char *name = mail + 1024;
  966. char *date = name + 1024;
  967. char *encrypted = date + 1024;
  968. while(ptr < end) {
  969. //fprintf(stderr,"%s\n",signature);
  970. DataStorage *resData = new DataStorage();
  971. i=0;
  972. mail[0] = 0;
  973. ptr = strstr(ptr,signature);
  974. ptr += strlen(signature);
  975. while(*ptr != '<') ptr++;
  976. ptr++;
  977. const char *endStr;
  978. if(*ptr == 'a' || *ptr == 'A') {
  979. replay:
  980. // has mail
  981. while(*ptr != '"') ptr++;
  982. ptr++;
  983. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  984. ptr += 28;
  985. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  986. encrypted[i] = 0;
  987. i = decryptMail((unsigned char *)mail,encrypted);
  988. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  989. ptr -= reconstruct_len;
  990. char *start = ptr;
  991. memcpy(ptr, "<a href=\"mailto:", 16);
  992. ptr += 16;
  993. memcpy(ptr, mail, i);
  994. ptr = start;
  995. i=0;
  996. goto replay;
  997. }
  998. else {
  999. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1000. while(*ptr != '"') mail[i++] = *ptr++;
  1001. mail[i] = 0;
  1002. }
  1003. endStr = "</a>";
  1004. }
  1005. else if(*ptr == 'b') {
  1006. endStr = NULL;
  1007. }
  1008. else {
  1009. endStr = "</font>";
  1010. }
  1011. if(endStr) {
  1012. ptr = strstr(ptr,"<b>");
  1013. ptr += 3;
  1014. }
  1015. else {
  1016. ptr = strchr(ptr,'>');
  1017. ptr++;
  1018. }
  1019. i=0;
  1020. while(1) {
  1021. if(*ptr == '<') {
  1022. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  1023. ptr += 4;
  1024. if(endStr) ptr += strlen(endStr);
  1025. break;
  1026. }
  1027. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1028. int j=0;
  1029. ptr = strstr(ptr,"data-cfemail=\"");
  1030. ptr += 14;
  1031. while(*ptr != '"') encrypted[j++] = *ptr++;
  1032. encrypted[j] = 0;
  1033. j = decryptMail((unsigned char *)name+i,encrypted);
  1034. i += j;
  1035. ptr = strstr(ptr,"</script>");
  1036. ptr += 9;
  1037. }
  1038. else name[i++] = *ptr++;
  1039. }
  1040. else name[i++] = *ptr++;
  1041. }
  1042. resData->appendBytes(name, i);
  1043. resData->appendBytes("<>", 2);
  1044. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1045. resData->appendBytes("<>", 2);
  1046. ptr += 2;
  1047. i=0;
  1048. while(1) {
  1049. if(*ptr == '<') {
  1050. if(!strncasecmp(ptr,"<dd>",4)) {
  1051. ptr += 4;
  1052. break;
  1053. }
  1054. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  1055. memcpy(date+i,"BE:",3);
  1056. ptr += 23;
  1057. i += 3;
  1058. while(*ptr != ')') date[i++] = *ptr++;
  1059. date[i++] = '-';
  1060. ptr = strchr(ptr,'?');
  1061. ptr++;
  1062. char *tmp = strstr(ptr,"</a>");
  1063. memcpy(date+i,ptr,tmp-ptr);
  1064. i += tmp-ptr;
  1065. ptr = tmp + 4;
  1066. }
  1067. else date[i++] = *ptr++;
  1068. }
  1069. else date[i++] = *ptr++;
  1070. }
  1071. resData->appendBytes(date ,i);
  1072. resData->appendBytes("<>", 2);
  1073. i=0;
  1074. while(1) {
  1075. if(*ptr == '<') {
  1076. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  1077. ptr += 9;
  1078. break;
  1079. }
  1080. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  1081. while(i>0 &&body[i-1] == '\n') i--;
  1082. break;
  1083. }
  1084. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1085. int j=0;
  1086. ptr = strstr(ptr,"data-cfemail=\"");
  1087. ptr += 14;
  1088. while(*ptr != '"') encrypted[j++] = *ptr++;
  1089. encrypted[j] = 0;
  1090. j = decryptMail(body+i,encrypted);
  1091. i += j;
  1092. ptr = strstr(ptr,"</script>");
  1093. ptr += 9;
  1094. }
  1095. else if(!strncmp(ptr,"<a href=\"http",13)) {
  1096. ptr = strchr(ptr,'>');
  1097. ptr++;
  1098. char *link = ptr;
  1099. ptr = strstr(link,"</a>");
  1100. memcpy(body+i,link,ptr-link);
  1101. i += ptr-link;
  1102. ptr += 4;
  1103. }
  1104. else if(!strncmp(ptr,"<img src=\"",10)) {
  1105. ptr += 10;
  1106. char *img = ptr;
  1107. ptr = strstr(img,"\">");
  1108. memcpy(body+i,img,ptr-img);
  1109. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1110. int length = ptr-img;
  1111. while(*img != '/') {
  1112. img++;
  1113. length--;
  1114. }
  1115. memcpy(body+i,"sssp:",5);
  1116. memcpy(body+i+5,img,length);
  1117. i += length + 5;
  1118. }
  1119. else i += ptr-img;
  1120. ptr += 2;
  1121. }
  1122. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  1123. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1124. memcpy(body+i," <br>",5);
  1125. i += 5;
  1126. }
  1127. else {
  1128. memcpy(body+i,"<br>",4);
  1129. i += 4;
  1130. }
  1131. ptr += 4;
  1132. }
  1133. else body[i++] = *ptr++;
  1134. }
  1135. else if(!bbspink && *ptr == ' ') {
  1136. if(*(ptr+1) == ' ') ptr++;
  1137. else body[i++] = *ptr++;
  1138. }
  1139. else body[i++] = *ptr++;
  1140. }
  1141. resData->appendBytes(body ,i);
  1142. resData->appendBytes("<>", 2);
  1143. if(res == 1) resData->appendBytes(title ,strlen(title));
  1144. resData->appendBytes("\n" ,1);
  1145. if(useCache && res == startResNum) {
  1146. PBBS2chProxyThreadInfo info;
  1147. bool hit = false;
  1148. pthread_mutex_lock(mutex);
  1149. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1150. if(it != threadCache->end()) {
  1151. info = it->second;
  1152. threadCache->erase(it);
  1153. }
  1154. pthread_mutex_unlock(mutex);
  1155. if(info) {
  1156. log_printf(5,"cache hit");
  1157. if(info->cachedData->length == resData->length) {
  1158. log_printf(5,"... size match");
  1159. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1160. log_printf(5,"... content match");
  1161. hit = true;
  1162. cachedSize = info->cachedSize - resData->length;
  1163. }
  1164. }
  1165. log_printf(5,"\n");
  1166. }
  1167. if(!hit) {
  1168. delete resData;
  1169. free(buffer);
  1170. return NULL;
  1171. }
  1172. }
  1173. txt->appendBytes(resData->bytes, resData->length);
  1174. res++;
  1175. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1176. snprintf(signature,32,"<dt>%d ",res);
  1177. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  1178. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1179. info->lastResNum = res-1;
  1180. info->cachedSize = txt->length+cachedSize;
  1181. info->cachedData = resData;
  1182. pthread_mutex_lock(mutex);
  1183. threadCache->insert(std::make_pair(threadKey,info));
  1184. pthread_mutex_unlock(mutex);
  1185. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1186. if(lastModified) {
  1187. *lastModified = 0;
  1188. char formattedDate[256];
  1189. char *ptr;
  1190. ptr = date;
  1191. int year = strtol(ptr,&ptr,10);
  1192. if(*ptr != '/') break;
  1193. ptr++;
  1194. int month = strtol(ptr,&ptr,10);
  1195. if(*ptr != '/') break;
  1196. ptr++;
  1197. int day = strtol(ptr,&ptr,10);
  1198. if(!*ptr) break;
  1199. while(*ptr != ' ' && *ptr != 0) ptr++;
  1200. if(!*ptr) break;
  1201. ptr++;
  1202. int hour = strtol(ptr,&ptr,10);
  1203. if(*ptr != ':') break;
  1204. ptr++;
  1205. int minutes = strtol(ptr,&ptr,10);
  1206. if(*ptr != ':') break;
  1207. ptr++;
  1208. int seconds = strtol(ptr,&ptr,10);
  1209. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1210. if(year < 100) year += 2000;
  1211. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1212. //fprintf(stderr,"%s\n",formattedDate);
  1213. struct tm time = {};
  1214. strptime(formattedDate,threadTimestampFmt,&time);
  1215. *lastModified = mktime(&time);
  1216. //gmtime_r(lastModified,&time);
  1217. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1218. //fprintf(stderr,"%s\n",formattedDate);
  1219. }
  1220. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1221. break;
  1222. }
  1223. delete resData;
  1224. }
  1225. free(buffer);
  1226. return txt;
  1227. }
  1228. DataStorage *BBS2chProxyConnection::html2dat(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1229. {
  1230. char *ptr = html->bytes;
  1231. char *end = html->bytes + html->length - 1;
  1232. DataStorage *txt = new DataStorage();
  1233. int res = startResNum, i=0;
  1234. char signature[64];
  1235. char title[1024];
  1236. int cachedSize = 0;
  1237. char signatureTag[32];
  1238. char closeTag[32];
  1239. int closeTagLen;
  1240. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  1241. if(!ptr) {
  1242. delete txt;
  1243. return html2dat_old(html, startResNum, lastModified, useCache);
  1244. }
  1245. else {
  1246. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  1247. if(ptr2) {
  1248. char *tmp = ptr2;
  1249. *ptr2 = 0;
  1250. while(*ptr2 != '<') ptr2--;
  1251. strcpy(signatureTag, ptr2);
  1252. *tmp = ' ';
  1253. }
  1254. else {
  1255. delete txt;
  1256. return NULL;
  1257. }
  1258. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  1259. if(ptr2) {
  1260. delete txt;
  1261. return html2dat_pink(html, startResNum, lastModified, useCache);
  1262. }*/
  1263. }
  1264. ptr += 18;
  1265. while(1) {
  1266. if(*ptr == '<') {
  1267. if(!strncasecmp(ptr,"</h1>",5)) {
  1268. ptr += 5;
  1269. break;
  1270. }
  1271. else title[i++] = *ptr++;
  1272. }
  1273. else if(*ptr == '\n') break;
  1274. else title[i++] = *ptr++;
  1275. }
  1276. title[i] = 0;
  1277. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  1278. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1279. if(!ptr) {
  1280. delete txt;
  1281. return NULL;
  1282. }
  1283. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1284. if(!buffer) {
  1285. delete txt;
  1286. return NULL;
  1287. }
  1288. unsigned char *body = buffer;
  1289. char *mail = (char *)body + 65536;
  1290. char *name = mail + 1024;
  1291. char *date = name + 1024;
  1292. char *encrypted = date + 1024;
  1293. while(ptr < end) {
  1294. //fprintf(stderr,"%s\n",signature);
  1295. DataStorage *resData = new DataStorage();
  1296. i=0;
  1297. mail[0] = 0;
  1298. ptr = strstr(ptr," class=\"name\"><b>");
  1299. if(ptr) {
  1300. char *tmp = ptr;
  1301. *ptr = 0;
  1302. while(*ptr != '<') ptr--;
  1303. snprintf(closeTag,32,"</%s>",ptr+1);
  1304. closeTagLen = strlen(closeTag);
  1305. ptr = tmp + 17;
  1306. }
  1307. else {
  1308. delete resData;
  1309. break;
  1310. }
  1311. char endStr[64];
  1312. if(!strncmp(ptr,"<a href=\"mailto:",16)) {
  1313. replay:
  1314. // has mail
  1315. while(*ptr != '"') ptr++;
  1316. ptr++;
  1317. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1318. ptr += 28;
  1319. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1320. encrypted[i] = 0;
  1321. i = decryptMail((unsigned char *)mail,encrypted);
  1322. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1323. ptr -= reconstruct_len;
  1324. char *start = ptr;
  1325. memcpy(ptr, "<a href=\"mailto:", 16);
  1326. ptr += 16;
  1327. memcpy(ptr, mail, i);
  1328. ptr = start;
  1329. i=0;
  1330. goto replay;
  1331. }
  1332. else {
  1333. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1334. while(1) {
  1335. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  1336. ptr = strchr(ptr,'>');
  1337. ptr++;
  1338. char *link = ptr;
  1339. ptr = strstr(link,"</a>");
  1340. memcpy(mail+i,link,ptr-link);
  1341. i += ptr-link;
  1342. ptr += 4;
  1343. }
  1344. else if(*ptr == '"') break;
  1345. else mail[i++] = *ptr++;
  1346. }
  1347. //while(*ptr != '"') mail[i++] = *ptr++;
  1348. mail[i] = 0;
  1349. }
  1350. snprintf(endStr,64,"</a></b>%s",closeTag);
  1351. while(*ptr != '>') ptr++;
  1352. ptr++;
  1353. }
  1354. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  1355. emit font tags anymore and it conflicts with text decorations using "melon point" */
  1356. /*else if(!strncmp(ptr,"<font",5)) {
  1357. snprintf(endStr,64,"</font></b>%s",closeTag);
  1358. while(*ptr != '>') ptr++;
  1359. ptr++;
  1360. }*/
  1361. else {
  1362. snprintf(endStr,64,"</b>%s",closeTag);
  1363. }
  1364. i=0;
  1365. while(1) {
  1366. if(*ptr == '<') {
  1367. if(!strncmp(ptr,endStr,strlen(endStr))) {
  1368. ptr += strlen(endStr);
  1369. break;
  1370. }
  1371. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1372. int j=0;
  1373. ptr = strstr(ptr,"data-cfemail=\"");
  1374. ptr += 14;
  1375. while(*ptr != '"') encrypted[j++] = *ptr++;
  1376. encrypted[j] = 0;
  1377. j = decryptMail((unsigned char *)name+i,encrypted);
  1378. i += j;
  1379. ptr = strstr(ptr,"</script>");
  1380. ptr += 9;
  1381. }
  1382. else if(!strncmp(ptr,"<a href=\"",9)) {
  1383. ptr = strchr(ptr,'>');
  1384. ptr++;
  1385. char *link = ptr;
  1386. ptr = strstr(link,"</a>");
  1387. memcpy(name+i,link,ptr-link);
  1388. i += ptr-link;
  1389. ptr += 4;
  1390. }
  1391. else name[i++] = *ptr++;
  1392. }
  1393. else name[i++] = *ptr++;
  1394. }
  1395. resData->appendBytes(name, i);
  1396. resData->appendBytes("<>", 2);
  1397. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1398. resData->appendBytes("<>", 2);
  1399. ptr = strstr(ptr," class=\"date\">");
  1400. if(ptr) {
  1401. char *tmp = ptr;
  1402. *ptr = 0;
  1403. while(*ptr != '<') ptr--;
  1404. snprintf(closeTag,32,"</%s>",ptr+1);
  1405. closeTagLen = strlen(closeTag);
  1406. ptr = tmp + 14;
  1407. }
  1408. else {
  1409. delete resData;
  1410. break;
  1411. }
  1412. i=0;
  1413. while(1) {
  1414. if(*ptr == '<') {
  1415. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1416. ptr += closeTagLen;
  1417. break;
  1418. }
  1419. else date[i++] = *ptr++;
  1420. }
  1421. else date[i++] = *ptr++;
  1422. }
  1423. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  1424. char *tmp = ptr+1;
  1425. while(*ptr != ' ') ptr++;
  1426. *ptr = 0;
  1427. snprintf(closeTag,32,"</%s>",tmp);
  1428. closeTagLen = strlen(closeTag);
  1429. ptr += 11;
  1430. while(*ptr != '>') ptr++;
  1431. ptr++;
  1432. date[i++] = ' ';
  1433. while(1) {
  1434. if(*ptr == '<') {
  1435. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1436. ptr += closeTagLen;
  1437. break;
  1438. }
  1439. else date[i++] = *ptr++;
  1440. }
  1441. else date[i++] = *ptr++;
  1442. }
  1443. }
  1444. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  1445. ptr += 14;
  1446. while(*ptr != '>') ptr++;
  1447. ptr++;
  1448. if(!strncmp(ptr,"<a href=\"",9)) {
  1449. ptr += 9;
  1450. while(*ptr != '/' && *ptr != '"') ptr++;
  1451. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  1452. memcpy(date+i," BE:",4);
  1453. i += 4;
  1454. ptr += 18;
  1455. while(*ptr != '"') date[i++] = *ptr++;
  1456. date[i++] = '-';
  1457. ptr = strchr(ptr,'?');
  1458. ptr++;
  1459. char *tmp = strstr(ptr,"</a>");
  1460. memcpy(date+i,ptr,tmp-ptr);
  1461. i += tmp-ptr;
  1462. ptr = tmp + 4;
  1463. }
  1464. }
  1465. }
  1466. resData->appendBytes(date ,i);
  1467. resData->appendBytes("<>", 2);
  1468. if(!strcmp(signatureTag,"<div")) {
  1469. ptr = strstr(ptr,"<div class=\"message\">");
  1470. if(!ptr) {
  1471. delete resData;
  1472. break;
  1473. }
  1474. else {
  1475. ptr += 21;
  1476. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  1477. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  1478. strcpy(closeTag,"</span></span></div>");
  1479. closeTagLen = 20;
  1480. ptr += 22+17;
  1481. }
  1482. else {
  1483. strcpy(closeTag,"</span></div>");
  1484. closeTagLen = 13;
  1485. ptr += 22;
  1486. }
  1487. }
  1488. else {
  1489. strcpy(closeTag,"</div>");
  1490. closeTagLen = 6;
  1491. }
  1492. }
  1493. }
  1494. else {
  1495. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  1496. if(!ptr) {
  1497. delete resData;
  1498. break;
  1499. }
  1500. strcpy(closeTag,"</dd>");
  1501. closeTagLen = 5;
  1502. ptr += 22;
  1503. }
  1504. i=0;
  1505. while(1) {
  1506. if(*ptr == '<') {
  1507. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1508. ptr += closeTagLen;
  1509. break;
  1510. }
  1511. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1512. int j=0;
  1513. ptr = strstr(ptr,"data-cfemail=\"");
  1514. ptr += 14;
  1515. while(*ptr != '"') encrypted[j++] = *ptr++;
  1516. encrypted[j] = 0;
  1517. j = decryptMail(body+i,encrypted);
  1518. i += j;
  1519. ptr = strstr(ptr,"</script>");
  1520. ptr += 9;
  1521. }
  1522. else if(!strncmp(ptr,"<a ",3)) {
  1523. char *tmp = strchr(ptr,'>');
  1524. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  1525. char *link = tmp+1;
  1526. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  1527. while(ptr < link) {
  1528. if(!strncmp(ptr," class=\"",8)) {
  1529. ptr += 8;
  1530. while(*ptr != '"' && *ptr != '>') ptr++;
  1531. if(*ptr == '"') ptr++;
  1532. }
  1533. else body[i++] = *ptr++;
  1534. }
  1535. }
  1536. else {
  1537. ptr = strstr(link,"</a>");
  1538. memcpy(body+i,link,ptr-link);
  1539. i += ptr-link;
  1540. ptr += 4;
  1541. }
  1542. }
  1543. else if(!strncmp(ptr,"<img src=\"",10)) {
  1544. ptr += 10;
  1545. char *img = ptr;
  1546. ptr = strstr(img,"\">");
  1547. memcpy(body+i,img,ptr-img);
  1548. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1549. int length = ptr-img;
  1550. while(*img != '/') {
  1551. img++;
  1552. length--;
  1553. }
  1554. memcpy(body+i,"sssp:",5);
  1555. memcpy(body+i+5,img,length);
  1556. i += length + 5;
  1557. }
  1558. else i += ptr-img;
  1559. ptr += 2;
  1560. }
  1561. else if(!strncmp(ptr,"<br>",4)) {
  1562. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1563. memcpy(body+i," <br>",5);
  1564. i += 5;
  1565. }
  1566. else {
  1567. memcpy(body+i,"<br>",4);
  1568. i += 4;
  1569. }
  1570. ptr += 4;
  1571. }
  1572. else body[i++] = *ptr++;
  1573. }
  1574. else body[i++] = *ptr++;
  1575. }
  1576. resData->appendBytes(body ,i);
  1577. resData->appendBytes("<>", 2);
  1578. if(res == 1) resData->appendBytes(title ,strlen(title));
  1579. resData->appendBytes("\n" ,1);
  1580. if(useCache && res == startResNum) {
  1581. PBBS2chProxyThreadInfo info;
  1582. bool hit = false;
  1583. pthread_mutex_lock(mutex);
  1584. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1585. if(it != threadCache->end()) {
  1586. info = it->second;
  1587. threadCache->erase(it);
  1588. }
  1589. pthread_mutex_unlock(mutex);
  1590. if(info) {
  1591. log_printf(5,"cache hit");
  1592. if(info->cachedData->length == resData->length) {
  1593. log_printf(5,"... size match");
  1594. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1595. log_printf(5,"... content match");
  1596. hit = true;
  1597. cachedSize = info->cachedSize - resData->length;
  1598. }
  1599. }
  1600. log_printf(5,"\n");
  1601. }
  1602. if(!hit) {
  1603. delete resData;
  1604. free(buffer);
  1605. return NULL;
  1606. }
  1607. }
  1608. txt->appendBytes(resData->bytes, resData->length);
  1609. res++;
  1610. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1611. snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  1612. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1613. if(ptr) {
  1614. int next = atoi(ptr+strlen(signature));
  1615. if(next >= res) {
  1616. while(next > res) {
  1617. txt->appendBytes("broken<><>broken<> broken <>\n", 29);
  1618. res++;
  1619. }
  1620. }
  1621. else ptr = NULL;
  1622. }
  1623. if(!ptr) {
  1624. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1625. info->lastResNum = res-1;
  1626. info->cachedSize = txt->length+cachedSize;
  1627. info->cachedData = resData;
  1628. pthread_mutex_lock(mutex);
  1629. threadCache->insert(std::make_pair(threadKey,info));
  1630. pthread_mutex_unlock(mutex);
  1631. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1632. if(lastModified) {
  1633. *lastModified = 0;
  1634. char formattedDate[256];
  1635. char *ptr;
  1636. ptr = date;
  1637. int year = strtol(ptr,&ptr,10);
  1638. if(*ptr != '/') break;
  1639. ptr++;
  1640. int month = strtol(ptr,&ptr,10);
  1641. if(*ptr != '/') break;
  1642. ptr++;
  1643. int day = strtol(ptr,&ptr,10);
  1644. if(!*ptr) break;
  1645. while(*ptr != ' ' && *ptr != 0) ptr++;
  1646. if(!*ptr) break;
  1647. ptr++;
  1648. int hour = strtol(ptr,&ptr,10);
  1649. if(*ptr != ':') break;
  1650. ptr++;
  1651. int minutes = strtol(ptr,&ptr,10);
  1652. if(*ptr != ':') break;
  1653. ptr++;
  1654. int seconds = strtol(ptr,&ptr,10);
  1655. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1656. if(year < 100) year += 2000;
  1657. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1658. //fprintf(stderr,"%s\n",formattedDate);
  1659. struct tm time = {};
  1660. strptime(formattedDate,threadTimestampFmt,&time);
  1661. *lastModified = mktime(&time);
  1662. //gmtime_r(lastModified,&time);
  1663. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1664. //fprintf(stderr,"%s\n",formattedDate);
  1665. }
  1666. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1667. break;
  1668. }
  1669. delete resData;
  1670. }
  1671. free(buffer);
  1672. return txt;
  1673. }
  1674. int BBS2chProxyConnection::datProxyAPI(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1675. {
  1676. long statusCode = 0;
  1677. const std::string &postBody = auth.requestBodyForURL(url, curl);
  1678. bool directMode = false;
  1679. if (postBody.empty()) {
  1680. sendResponse(401, "Unauthorized", socketToClient);
  1681. return 401;
  1682. }
  1683. /* just read and strip off post body */
  1684. if (!strcasecmp(method, "POST")) {
  1685. char *postdata = NULL;
  1686. if (isClientChunked) {
  1687. readChunkedBodyIntoBuffer(&postdata, socketToClient);
  1688. }
  1689. else if (content_length) {
  1690. postdata = (char *)calloc(content_length+1, 1);
  1691. socketToClient->read(postdata, content_length);
  1692. }
  1693. if (postdata && strstr(postdata, "sid=")) directMode = true;
  1694. if (postdata) free(postdata);
  1695. }
  1696. if (curl) {
  1697. CURLcode res;
  1698. struct curl_slist *headersForCurl = NULL;
  1699. DataStorage receivedHeader;
  1700. DataStorage receivedBody;
  1701. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Range");
  1702. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "If-Modified-Since");
  1703. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Accept-Encoding");
  1704. if (x_2ch_ua_dat) headersForCurl = curl_slist_append(headersForCurl, x_2ch_ua_dat);
  1705. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1706. curl_easy_setopt(curl, CURLOPT_URL, url);
  1707. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  1708. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1709. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1710. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1711. curl_easy_setopt(curl, CURLOPT_WRITEDATA, &receivedBody);
  1712. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_download);
  1713. curl_easy_setopt(curl, CURLOPT_HEADERDATA, &receivedHeader);
  1714. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1715. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1716. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1717. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1718. if (proxy_server) {
  1719. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1720. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1721. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1722. }
  1723. if (api_ua_dat) {
  1724. curl_easy_setopt(curl, CURLOPT_USERAGENT, api_ua_dat);
  1725. }
  1726. else {
  1727. if (user_agent && !strncmp(user_agent, "Monazilla/", strlen("Monazilla/")))
  1728. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1729. else
  1730. curl_easy_setopt(curl, CURLOPT_USERAGENT, "");
  1731. }
  1732. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  1733. #if LIBCURL_VERSION_NUM >= 0x071101
  1734. curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, postBody.c_str());
  1735. #else
  1736. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postBody.c_str());
  1737. #endif
  1738. //return;
  1739. res = curl_easy_perform(curl);
  1740. if (res == CURLE_OK) {
  1741. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  1742. receivedHeader.appendBytes("", 1);
  1743. const char *ptr = receivedHeader.bytes;
  1744. /* this is necessary because the raw header may contain chunk trailers after real headers */
  1745. const char *end = strstr(receivedHeader.bytes, "\r\n\r\n");
  1746. int threadStatus = 0;
  1747. if (end && !directMode) {
  1748. BBS2chProxyHttpHeaders headers;
  1749. while (ptr < end) {
  1750. const char *lineEnd = strchr(ptr, '\n');
  1751. if (!lineEnd) break;
  1752. headers.add(ptr, lineEnd-ptr);
  1753. ptr = lineEnd + 1;
  1754. }
  1755. if (headers.has("Thread-Status")) {
  1756. threadStatus = atoi(headers.get("Thread-Status").c_str());
  1757. }
  1758. }
  1759. if (threadStatus == 1 || (directMode && end)) {
  1760. if (end+4-receivedHeader.bytes > socketToClient->write(receivedHeader.bytes, end+4-receivedHeader.bytes)) goto last;
  1761. if (receivedBody.length > socketToClient->write(receivedBody.bytes, receivedBody.length)) goto last;
  1762. goto last;
  1763. }
  1764. else if (threadStatus == 8) {
  1765. sendBasicHeaders(302, "Found", socketToClient);
  1766. if (0 >= socketToClient->writeString("Location: http://www2.2ch.net/live.html\r\n")) goto last;
  1767. if (0 >= socketToClient->writeString("\r\n")) goto last;
  1768. statusCode = 302;
  1769. goto last;
  1770. }
  1771. else {
  1772. if (statusCode < 400) {
  1773. sendResponse(401, "Unauthorized", socketToClient);
  1774. statusCode = 401;
  1775. }
  1776. else {
  1777. sendResponse(503, "Service Unavailable", socketToClient);
  1778. statusCode = 503;
  1779. }
  1780. receivedBody.appendBytes("",1);
  1781. if (!strncasecmp(receivedBody.bytes,"ng (",4)) {
  1782. log_printf(0, "API gateway returned error: %s\n", receivedBody.bytes);
  1783. }
  1784. }
  1785. //fprintf(stderr,"%ld\n",statusCode);
  1786. }
  1787. else {
  1788. log_printf(0, "curl error: %s\n", curl_easy_strerror(res));
  1789. sendResponse(503, "Service Unavailable", socketToClient);
  1790. statusCode = 503;
  1791. }
  1792. last:
  1793. curl_easy_reset(curl);
  1794. curl_slist_free_all(headersForCurl);
  1795. }
  1796. return statusCode;
  1797. }
  1798. int BBS2chProxyConnection::bbsmenuProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1799. {
  1800. long statusCode = 0;
  1801. DataStorage *dat = new DataStorage();
  1802. DataStorage *outHTML = new DataStorage();
  1803. if(curl) {
  1804. CURLcode res;
  1805. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1806. curl_easy_setopt(curl, CURLOPT_URL, url);
  1807. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1808. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1809. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  1810. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1811. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  1812. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  1813. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1814. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1815. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1816. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1817. if(proxy_server) {
  1818. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1819. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1820. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1821. }
  1822. if(user_agent) {
  1823. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1824. }
  1825. else if(requestHeaders.has("User-Agent")) {
  1826. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  1827. }
  1828. res = curl_easy_perform(curl);
  1829. if(res == CURLE_OK) {
  1830. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1831. if(statusCode == 200 && dat->length) {
  1832. dat->appendBytes("",1);
  1833. dat->length--;
  1834. char *ptr = dat->bytes;
  1835. while(*ptr) {
  1836. if(!strncasecmp(ptr,"<a href=",8)) {
  1837. char *start = ptr;
  1838. char *end = strchr(ptr+8,'>');
  1839. ptr = strstr(ptr+8,"://");
  1840. if(ptr && ptr < end) {
  1841. char *protocol = ptr;
  1842. char *end2 = strchr(ptr+3,'/');
  1843. ptr = strstr(protocol+3,"5ch.net");
  1844. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1845. memcpy(ptr,"2ch",3);
  1846. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1847. else outHTML->appendBytes(start, protocol-start);
  1848. outHTML->appendBytes(protocol, end-protocol);
  1849. ptr = end;
  1850. continue;
  1851. }
  1852. ptr = strstr(protocol+3,"bbspink.com");
  1853. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1854. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1855. else outHTML->appendBytes(start, protocol-start);
  1856. outHTML->appendBytes(protocol, end-protocol);
  1857. ptr = end;
  1858. continue;
  1859. }
  1860. }
  1861. ptr = start;
  1862. }
  1863. outHTML->appendBytes(ptr++, 1);
  1864. }
  1865. }
  1866. }
  1867. else {
  1868. log_printf(0,"curl error: %s (%s)\n", curl_easy_strerror(res), url);
  1869. statusCode = 503;
  1870. }
  1871. }
  1872. if(statusCode == 200) {
  1873. std::ostringstream ss;
  1874. ss << "Content-Length: " << outHTML->length << "\r\n";
  1875. sendBasicHeaders(statusCode,"OK",socketToClient);
  1876. if(0 >= socketToClient->writeString("Content-Type: text/html\r\n")) goto last;
  1877. if(0 >= socketToClient->writeString(ss.str())) goto last;
  1878. if(0 >= socketToClient->writeString("\r\n")) goto last;
  1879. if(strcasecmp(method, "HEAD")) {
  1880. if(outHTML->length > socketToClient->write(outHTML->bytes, outHTML->length)) goto last;
  1881. }
  1882. }
  1883. else {
  1884. sendResponse(503, "Service Unavailable", socketToClient);
  1885. statusCode = 503;
  1886. }
  1887. last:
  1888. if(curl) curl_easy_reset(curl);
  1889. if(dat) delete dat;
  1890. if(outHTML) delete outHTML;
  1891. return statusCode;
  1892. }
  1893. int BBS2chProxyConnection::bbsCgiProxy(const char *url, BBS2chProxyHttpHeaders &requestHeaders, const char *requestBody)
  1894. {
  1895. long statusCode = 0;
  1896. std::string hostStr = requestHeaders.get("Host");
  1897. std::string boardStr;
  1898. std::string threadStr;
  1899. requestHeaders.remove("Host");
  1900. if (user_agent) requestHeaders.set("User-Agent", user_agent);
  1901. if (requestBody && (lua_script || !bbscgi_headers.empty() || !bbscgi_postorder.empty())) {
  1902. std::map<std::string, std::string> fields;
  1903. const char *ptr = requestBody;
  1904. size_t bodyLength = 0;
  1905. while (1) {
  1906. const char *tmp = ptr;
  1907. while (*tmp != '=' && *tmp != 0) tmp++;
  1908. if (*tmp == 0) {
  1909. bodyLength = tmp - requestBody;
  1910. break;
  1911. }
  1912. std::string key(ptr, tmp-ptr);
  1913. tmp++;
  1914. ptr = tmp;
  1915. while (*tmp != '&' && *tmp != 0) tmp++;
  1916. std::string value(ptr, tmp-ptr);
  1917. fields.insert(std::make_pair(key, value));
  1918. if (*tmp == 0) {
  1919. bodyLength = tmp - requestBody;
  1920. break;
  1921. }
  1922. ptr = tmp + 1;
  1923. }
  1924. std::map<std::string, std::string>::iterator it;
  1925. if (it = fields.find("bbs"), it != fields.end()) boardStr = it->second;
  1926. if (it = fields.find("key"), it != fields.end()) threadStr = it->second;
  1927. if (!bbscgi_postorder.empty()) {
  1928. std::string newBody;
  1929. for (std::vector<std::string>::iterator it2 = bbscgi_postorder.begin(); it2 != bbscgi_postorder.end(); it2++) {
  1930. const std::string &name = *it2;
  1931. if (it = fields.find(name), it != fields.end()) {
  1932. if (!newBody.empty()) newBody.append("&");
  1933. newBody.append(name);
  1934. newBody.append("=");
  1935. newBody.append(it->second);
  1936. fields.erase(name);
  1937. }
  1938. }
  1939. for (it = fields.begin(); it != fields.end(); it++) {
  1940. if (!newBody.empty()) newBody.append("&");
  1941. newBody.append(it->first);
  1942. newBody.append("=");
  1943. newBody.append(it->second);
  1944. }
  1945. if (bodyLength == newBody.length()) {
  1946. strcpy((char *)requestBody, newBody.c_str());
  1947. log_printf(1, "Reordered request body is: %s\n", requestBody);
  1948. }
  1949. else {
  1950. log_printf(0, "Error occured while reordering the request body - skipping\n");
  1951. }
  1952. }
  1953. }
  1954. if (!bbscgi_headers.empty()) {
  1955. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it!=bbscgi_headers.end(); it++) {
  1956. /* we cannot use a reference here, because the original string shouldn't be replaced */
  1957. std::string value = it->second;
  1958. if (!hostStr.empty()) {
  1959. std::string::size_type pos = value.find("%HOST%");
  1960. while (pos != std::string::npos) {
  1961. value.replace(pos, 6, hostStr);
  1962. pos = value.find("%HOST%", pos+hostStr.length());
  1963. }
  1964. }
  1965. if (!boardStr.empty()) {
  1966. std::string::size_type pos = value.find("%BOARD%");
  1967. while (pos != std::string::npos) {
  1968. value.replace(pos, 7, boardStr);
  1969. pos = value.find("%BOARD%", pos+boardStr.length());
  1970. }
  1971. }
  1972. if (!threadStr.empty()) {
  1973. std::string::size_type pos = value.find("%THREAD%");
  1974. while (pos != std::string::npos) {
  1975. value.replace(pos, 8, threadStr);
  1976. pos = value.find("%THREAD%", pos+threadStr.length());
  1977. }
  1978. }
  1979. requestHeaders.set(it->first, value);
  1980. log_printf(1, "Appended custom header \"%s: %s\"\n", it->first.c_str(), value.c_str());
  1981. }
  1982. }
  1983. for (int run=0; run<2; run++) {
  1984. BBS2chProxyHttpHeaders *_headers = new BBS2chProxyHttpHeaders(requestHeaders);
  1985. curl_slist *headersForCurl = NULL;
  1986. char *_body = (char *)requestBody;
  1987. std::string nic;
  1988. long verbose = 0;
  1989. status = 0;
  1990. monaKeyForRequest = "";
  1991. #ifdef USE_LUA
  1992. if (lua_script) {
  1993. lua_State* l = luaL_newstate();
  1994. luaL_openlibs(l);
  1995. if (luaL_loadfile(l, lua_script) != LUA_OK) {
  1996. log_printf(0, "Lua: Failed to open script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  1997. goto lua_end;
  1998. }
  1999. lua_newtable(l);
  2000. lua_pushcfunction(l, lua_hmacSHA256);
  2001. lua_setfield(l, -2, "hmacSHA256");
  2002. lua_pushcfunction(l, lua_decodeURIComponent);
  2003. lua_setfield(l, -2, "decodeURIComponent");
  2004. lua_pushcfunction(l, lua_encodeURIComponent);
  2005. lua_setfield(l, -2, "encodeURIComponent");
  2006. lua_pushcfunction(l, lua_convertShiftJISToUTF8);
  2007. lua_setfield(l, -2, "convertShiftJISToUTF8");
  2008. lua_pushcfunction(l, lua_isExpiredKey);
  2009. lua_setfield(l, -2, "isExpiredKey");
  2010. lua_pushcfunction(l, lua_isValidAsUTF8);
  2011. lua_setfield(l, -2, "isValidAsUTF8");
  2012. lua_pushcfunction(l, lua_getMonaKey);
  2013. lua_setfield(l, -2, "getMonaKey");
  2014. lua_pushstring(l, keyManager.getKey().c_str());
  2015. lua_setfield(l, -2, "monaKey");
  2016. lua_setglobal(l, "proxy2ch");
  2017. BBS2chProxyHttpHeaders::getClassDefinitionForLua(l);
  2018. lua_setglobal(l, "HttpHeaders");
  2019. if (lua_pcall(l, 0, 0, 0) != LUA_OK) {
  2020. log_printf(0, "Lua: Failed to run script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  2021. goto lua_end;
  2022. }
  2023. lua_getglobal(l, "willSendRequestToBbsCgi");
  2024. if (!lua_isfunction(l, -1)) {
  2025. log_printf(0, "Lua: willSendRequestToBbsCgi function does not exist in the script\n");
  2026. goto lua_end;
  2027. }
  2028. lua_newtable(l);
  2029. _headers->getUserdataForLua(l);
  2030. lua_setfield(l, -2, "headers");
  2031. lua_pushstring(l, _body);
  2032. lua_setfield(l, -2, "body");
  2033. lua_pushstring(l, hostStr.c_str());
  2034. lua_pushstring(l, boardStr.c_str());
  2035. lua_pushstring(l, threadStr.c_str());
  2036. if (lua_pcall(l, 4, 1, 0) != LUA_OK) {
  2037. log_printf(0, "Lua: Failed to call willSendRequestToBbsCgi function:\n %s\n", lua_tostring(l, -1));
  2038. goto lua_end;
  2039. }
  2040. if (!lua_istable(l, -1)) {
  2041. log_printf(0, "Lua: A return type of willSendRequestToBbsCgi function should be a table\n");
  2042. goto lua_end;
  2043. }
  2044. lua_pushstring(l, "body");
  2045. lua_rawget(l, -2);
  2046. if (lua_isstring(l, -1)) {
  2047. const char *newBody = lua_tostring(l, -1);
  2048. _body = strdup(newBody);
  2049. log_printf(1, "Lua: Set request body \"%s\"\n", newBody);
  2050. }
  2051. lua_pop(l, 1);
  2052. lua_pushstring(l, "headers");
  2053. lua_rawget(l, -2);
  2054. if (lua_istable(l, -1)) {
  2055. delete _headers;
  2056. _headers = new BBS2chProxyHttpHeaders();
  2057. lua_pushnil(l);
  2058. while (lua_next(l, -2)) {
  2059. if (lua_isstring(l, -1) && lua_isstring(l, -2)) {
  2060. const char *name = lua_tostring(l, -2);
  2061. const char *value = lua_tostring(l, -1);
  2062. _headers->add(name, value);
  2063. log_printf(1, "Lua: Set request header \"%s: %s\"\n", name, value);
  2064. }
  2065. lua_pop(l, 1);
  2066. }
  2067. }
  2068. else if (lua_isuserdata(l, -1)) {
  2069. if (lua_getmetatable(l, -1)) {
  2070. if (lua_getfield(l, -1, "_type") == LUA_TSTRING) {
  2071. if (!strcmp(lua_tostring(l, -1), "HttpHeaders")) {
  2072. BBS2chProxyHttpHeaders *newHeaders = *((BBS2chProxyHttpHeaders **)lua_touserdata(l, -3));
  2073. if (newHeaders != _headers) {
  2074. /* remove metatable to prevent the object from garbage collected by lua */
  2075. lua_newtable(l);
  2076. lua_setmetatable(l, -4);
  2077. delete _headers;
  2078. _headers = newHeaders;
  2079. }
  2080. for (std::map<std::string, PBBS2chProxyHttpHeaderEntry>::iterator it = _headers->getMap().begin(); it != _headers->getMap().end(); it++) {
  2081. log_printf(1, "Lua: Set request header \"%s\"\n", it->second->getFull().c_str());
  2082. }
  2083. }
  2084. }
  2085. lua_pop(l, 2);
  2086. }
  2087. }
  2088. lua_pop(l, 1);
  2089. lua_pushstring(l, "options");
  2090. lua_rawget(l, -2);
  2091. if (lua_istable(l, -1)) {
  2092. lua_pushstring(l, "interface");
  2093. lua_rawget(l, -2);
  2094. if (lua_isstring(l, -1)) {
  2095. nic = std::string(lua_tostring(l, -1));
  2096. }
  2097. lua_pop(l, 1);
  2098. lua_pushstring(l, "verbose");
  2099. lua_rawget(l, -2);
  2100. if (lua_isboolean(l, -1)) {
  2101. verbose = lua_toboolean(l, -1);
  2102. }
  2103. lua_pop(l, 1);
  2104. }
  2105. lua_end:
  2106. lua_close(l);
  2107. }
  2108. #endif
  2109. do {
  2110. bool isPink = hostStr.find("bbspink.com") != std::string::npos;
  2111. bool shouldSign = appKey && (((api_mode & 2) && !isPink) || (api_mode & 4));
  2112. bool shouldConvertBodyToUTF8 = (bbscgi_utf8 == 1 && shouldSign) || (bbscgi_utf8 == 2);
  2113. userAgentForRequest = _headers->get("User-Agent");
  2114. if (userAgentForRequest.empty() && user_agent) userAgentForRequest = user_agent;
  2115. if (_headers->has("X-MonaKey")) {
  2116. monaKeyForRequest = _headers->get("X-MonaKey");
  2117. }
  2118. if (shouldConvertBodyToUTF8 && !_headers->has("X-PostSig")) {
  2119. std::string newBody = convertBodyToUTF8(_body);
  2120. if (!newBody.empty()) {
  2121. if (_body != requestBody) {
  2122. free(_body);
  2123. }
  2124. _body = strdup(newBody.c_str());
  2125. log_printf(1, "Converted request body to UTF-8: %s\n", _body);
  2126. }
  2127. else {
  2128. log_printf(1, "Request body seems already to be UTF-8, will be sent without conversion\n");
  2129. }
  2130. _headers->set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
  2131. log_printf(1, "Appended header \"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\"\n");
  2132. }
  2133. if (shouldSign && (!lua_script || !_headers->has("X-PostSig"))) {
  2134. if (!userAgentForRequest.empty()) {
  2135. monaKeyForRequest = keyManager.getKey(userAgentForRequest);
  2136. appendPostSignature(_body, userAgentForRequest, monaKeyForRequest, _headers);
  2137. } else {
  2138. log_printf(0, "API: User-Agent muse be set explicitly to post with API.\n");
  2139. }
  2140. }
  2141. if (!monaKeyForRequest.empty()) {
  2142. double wait = keyManager.secondsToWaitBeforePosting(monaKeyForRequest);
  2143. if (wait > 0) {
  2144. log_printf(1, "Sleeping for %.1f seconds to avoid posting too fast...\n", wait);
  2145. #ifdef _WIN32
  2146. Sleep(wait * 1e+3);
  2147. #else
  2148. usleep(wait * 1e+6);
  2149. #endif
  2150. }
  2151. }
  2152. headersForCurl = _headers->appendToCurlSlist(headersForCurl);
  2153. if (!_headers->has("Expect")) headersForCurl = curl_slist_append(headersForCurl, "Expect:");
  2154. if (!_headers->has("Accept")) headersForCurl = curl_slist_append(headersForCurl, "Accept:");
  2155. } while (0);
  2156. if (curl) {
  2157. CURLcode res;
  2158. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  2159. curl_easy_setopt(curl, CURLOPT_URL, url);
  2160. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  2161. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  2162. if (run == 0)
  2163. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_bbscgi);
  2164. else
  2165. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  2166. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  2167. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  2168. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  2169. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  2170. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, _body);
  2171. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  2172. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  2173. curl_easy_setopt(curl, CURLOPT_VERBOSE, verbose);
  2174. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  2175. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  2176. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  2177. if (!nic.empty()) curl_easy_setopt(curl, CURLOPT_INTERFACE, nic.c_str());
  2178. if (user_agent) {
  2179. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  2180. }
  2181. if (proxy_server) {
  2182. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  2183. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  2184. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  2185. }
  2186. res = curl_easy_perform(curl);
  2187. if (res != CURLE_OK) {
  2188. if (res == CURLE_WRITE_ERROR && status == 2) {
  2189. log_printf(1, "MonaKey should be reset. Sending the same request automatically...\n");
  2190. curl_easy_reset(curl);
  2191. curl_slist_free_all(headersForCurl);
  2192. delete _headers;
  2193. if (_body != requestBody) free(_body);
  2194. continue;
  2195. }
  2196. else {
  2197. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  2198. if (!status) sendResponse(503, "Service Unavailable", socketToClient);
  2199. statusCode = 503;
  2200. }
  2201. }
  2202. else {
  2203. if (isResponseChunked) {
  2204. socketToClient->writeString("0\r\n\r\n");
  2205. }
  2206. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  2207. }
  2208. curl_easy_reset(curl);
  2209. }
  2210. curl_slist_free_all(headersForCurl);
  2211. delete _headers;
  2212. if (_body != requestBody) free(_body);
  2213. break;
  2214. }
  2215. return statusCode;
  2216. }
  2217. void BBS2chProxyConnection::compileRegex(void)
  2218. {
  2219. static int compiled;
  2220. if (compiled) return;
  2221. regcomp(&regex, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/dat/([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2222. regcomp(&regex_kako, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/kako/[0-9]+/([0-9]+/)?([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2223. regcomp(&regex_offlaw, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/test/offlaw2.so\\?.*bbs=([^&]+)", REG_EXTENDED|REG_ICASE);
  2224. regcomp(&regex_api, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/([^/]+)/([^/]+)/([0-9]+)", REG_EXTENDED|REG_ICASE);
  2225. regcomp(&regex_api_auth, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/auth/?$", REG_EXTENDED|REG_ICASE);
  2226. compiled = 1;
  2227. }