BBS2chProxyConnection.cpp 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307
  1. #include <string>
  2. #include <vector>
  3. #include <map>
  4. #include <set>
  5. #include <sstream>
  6. #include <stdexcept>
  7. #include <algorithm>
  8. #include <pthread.h>
  9. #include <time.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <unistd.h>
  13. #ifdef USE_LUA
  14. #include <lua.hpp>
  15. #endif
  16. #ifdef _WIN32
  17. #include <fcntl.h>
  18. #include <winsock2.h>
  19. #include <ws2tcpip.h>
  20. #include <mswsock.h>
  21. #define CLOSESOCKET(x) closesocket(x)
  22. #else
  23. #include <sys/socket.h>
  24. #include <netinet/in.h>
  25. #include <netdb.h>
  26. #include <arpa/inet.h>
  27. #include <poll.h>
  28. #define CLOSESOCKET(x) close(x)
  29. #endif
  30. #include "BBS2chProxyConnection.h"
  31. #include "DataStorage.h"
  32. #include "hmac.h"
  33. #include "stringEncodingConverter.h"
  34. #include "BBS2chProxyRawSocket.h"
  35. #ifdef USE_MITM
  36. #include "BBS2chProxySecureSocket.h"
  37. #endif
  38. //#define DEBUG 1
  39. extern char *proxy_server;
  40. extern long proxy_port;
  41. extern long proxy_type;
  42. extern long timeout;
  43. extern char *user_agent;
  44. extern char *api_ua_dat;
  45. extern char *x_2ch_ua_dat;
  46. extern char *appKey;
  47. extern char *hmacKey;
  48. extern int allow_chunked;
  49. extern int curl_features;
  50. extern unsigned int curl_version_number;
  51. extern bool accept_https;
  52. extern int force_5chnet;
  53. extern int force_5chnet_https;
  54. extern int force_ipv4;
  55. extern char *bbsmenu_url;
  56. extern char *api_server;
  57. extern std::map<std::string, std::string> bbscgi_headers;
  58. extern int gikofix;
  59. extern CURLSH *curl_share;
  60. extern char *lua_script;
  61. extern unsigned int api_mode;
  62. extern std::vector<std::string> bbscgi_postorder;
  63. extern unsigned int bbscgi_utf8;
  64. extern int api_override;
  65. #ifdef USE_MITM
  66. extern unsigned int mitm_mode;
  67. #endif
  68. extern void log_printf(int level, const char *format ...);
  69. #include "utils.h"
  70. static std::string monaKey;
  71. static std::map<std::string, double> monaKeyIssueTime;
  72. static std::set<std::string> expiredMonaKeys;
  73. static regex_t regex;
  74. static regex_t regex_kako;
  75. static regex_t regex_offlaw;
  76. static regex_t regex_api;
  77. static regex_t regex_api_auth;
  78. #ifdef USE_LUA
  79. extern "C" {
  80. static int lua_hmacSHA256(lua_State *l)
  81. {
  82. static const char *table = "0123456789abcdef";
  83. size_t keyLength, dataLength;
  84. const char *key = luaL_checklstring(l, 1, &keyLength);
  85. const char *data = luaL_checklstring(l, 2, &dataLength);
  86. if (!key || !data) return 0;
  87. unsigned char digest[32];
  88. char digestStr[65];
  89. proxy2ch_HMAC_SHA256(key, keyLength, data, dataLength, digest);
  90. for (int i=0; i<32; i++) {
  91. unsigned char c = digest[i];
  92. unsigned char upper = (c >> 4) & 0xf;
  93. unsigned char lower = c & 0xf;
  94. digestStr[i*2] = table[upper];
  95. digestStr[i*2+1] = table[lower];
  96. }
  97. digestStr[64] = 0;
  98. lua_pushstring(l, digestStr);
  99. return 1;
  100. }
  101. static int lua_decodeURIComponent(lua_State *l)
  102. {
  103. size_t length;
  104. const char *input = luaL_checklstring(l, 1, &length);
  105. if (!input) return 0;
  106. bool decodePlus = true;
  107. if (!lua_isnoneornil(l, 2)) {
  108. decodePlus = (lua_toboolean(l, 2));
  109. }
  110. std::string output = decodeURIComponent(input, length, decodePlus);
  111. lua_pushstring(l, output.c_str());
  112. return 1;
  113. }
  114. static int lua_encodeURIComponent(lua_State *l)
  115. {
  116. size_t length;
  117. const char *input = luaL_checklstring(l, 1, &length);
  118. if (!input) return 0;
  119. bool spaceAsPlus = true;
  120. if (!lua_isnoneornil(l, 2)) {
  121. spaceAsPlus = (lua_toboolean(l, 2));
  122. }
  123. std::string output = encodeURIComponent(input, length, spaceAsPlus);
  124. lua_pushstring(l, output.c_str());
  125. return 1;
  126. }
  127. static int lua_convertShiftJISToUTF8(lua_State *l)
  128. {
  129. size_t length;
  130. const char *input = luaL_checklstring(l, 1, &length);
  131. if (!input) return 0;
  132. if (length > 0) {
  133. char *output = convertShiftJISToUTF8(input, length);
  134. if (!output) lua_pushnil(l);
  135. else {
  136. lua_pushstring(l, output);
  137. free(output);
  138. }
  139. }
  140. else lua_pushstring(l, "");
  141. return 1;
  142. }
  143. static int lua_isExpiredKey(lua_State *l)
  144. {
  145. size_t length;
  146. const char *input = luaL_checklstring(l, 1, &length);
  147. if (!input) return 0;
  148. if (expiredMonaKeys.count(input)) {
  149. lua_pushboolean(l, 1);
  150. }
  151. else lua_pushboolean(l, 0);
  152. return 1;
  153. }
  154. static int lua_isValidAsUTF8(lua_State *l)
  155. {
  156. size_t length;
  157. const char *input = luaL_checklstring(l, 1, &length);
  158. if (!input) return 0;
  159. lua_pushboolean(l, isValidAsUTF8(input, length));
  160. return 1;
  161. }
  162. }
  163. #endif
  164. void BBS2chProxyConnection::setMonaKey(const std::string &key, int reason) {
  165. /* do nothing when all-zero key is given - is this safe for all cases? */
  166. if (key == "00000000-0000-0000-0000-000000000000") {
  167. return;
  168. }
  169. pthread_mutex_lock(mutex);
  170. if (key.empty()) {
  171. if ((reason >= 3320 && reason <= 3324) || (reason >= 3390 && reason <= 3392)) {
  172. expiredMonaKeys.insert(monaKeyForRequest);
  173. }
  174. log_printf(1, "Reset MonaKey\n");
  175. }
  176. else {
  177. monaKeyIssueTime[key] = getCurrentTime();
  178. log_printf(1, "Updated MonaKey: %s\n", key.c_str());
  179. }
  180. monaKey = key;
  181. pthread_mutex_unlock(mutex);
  182. }
  183. std::string BBS2chProxyConnection::getMonaKey() {
  184. if (monaKey.empty())
  185. return "00000000-0000-0000-0000-000000000000";
  186. return monaKey;
  187. }
  188. void BBS2chProxyConnection::run(void * (*func)(void *))
  189. {
  190. pthread_t thread;
  191. pthread_attr_t thread_attr;
  192. pthread_attr_init(&thread_attr);
  193. pthread_attr_setdetachstate(&thread_attr , PTHREAD_CREATE_DETACHED);
  194. if(0 != pthread_create(&thread , &thread_attr , func , this))
  195. perror("pthread_create");
  196. pthread_attr_destroy(&thread_attr);
  197. }
  198. struct TunnelSockets {
  199. int sock_c;
  200. int sock_s;
  201. std::string addr;
  202. int port;
  203. };
  204. static void *tunnelMain(void *param)
  205. {
  206. TunnelSockets *sockets = (TunnelSockets *)param;
  207. char *buf = new char[16384];
  208. #ifdef _WIN32
  209. fd_set fds;
  210. int nfds = sockets->sock_c > sockets->sock_s ? sockets->sock_c + 1 : sockets->sock_s + 1;
  211. #else
  212. struct pollfd fds[2];
  213. memset(fds, 0, sizeof(fds));
  214. fds[0].fd = sockets->sock_c;
  215. fds[0].events = POLLIN;
  216. fds[1].fd = sockets->sock_s;
  217. fds[1].events = POLLIN;
  218. #endif
  219. while (1) {
  220. #ifdef _WIN32
  221. FD_ZERO(&fds);
  222. FD_SET(sockets->sock_c, &fds);
  223. FD_SET(sockets->sock_s, &fds);
  224. if (select(nfds, &fds, NULL, NULL, NULL) < 0) break;
  225. if (FD_ISSET(sockets->sock_c, &fds)) {
  226. int ret = recv(sockets->sock_c, buf, 16384, 0);
  227. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  228. else if (ret <= 0) break;
  229. }
  230. if (FD_ISSET(sockets->sock_s, &fds)) {
  231. int ret = recv(sockets->sock_s, buf, 16384, 0);
  232. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  233. else if (ret <= 0) break;
  234. }
  235. #else
  236. if (poll(fds, 2, -1) < 0) break;
  237. if (fds[0].revents & POLLIN) {
  238. int ret = recv(sockets->sock_c, buf, 16384, 0);
  239. if (ret > 0) send(sockets->sock_s, buf, ret, 0);
  240. else if (ret <= 0) break;
  241. }
  242. else if (fds[0].revents != 0) break;
  243. if (fds[1].revents & POLLIN) {
  244. int ret = recv(sockets->sock_s, buf, 16384, 0);
  245. if (ret > 0) send(sockets->sock_c, buf, ret, 0);
  246. else if (ret <= 0) break;
  247. }
  248. else if (fds[1].revents != 0) break;
  249. #endif
  250. }
  251. CLOSESOCKET(sockets->sock_c);
  252. CLOSESOCKET(sockets->sock_s);
  253. log_printf(1, "Finished tunneling to %s:%d\n", sockets->addr.c_str(), sockets->port);
  254. delete sockets;
  255. delete[] buf;
  256. return NULL;
  257. }
  258. int BBS2chProxyConnection::tunnel(const char *addr, int port)
  259. {
  260. struct sockaddr_in server;
  261. memset(&server, 0, sizeof(server));
  262. server.sin_family = AF_INET;
  263. server.sin_addr.s_addr = inet_addr(addr);
  264. server.sin_port = htons(port);
  265. if(server.sin_addr.s_addr == 0xffffffff) {
  266. struct hostent *host;
  267. host = gethostbyname(addr);
  268. if (host == NULL) {
  269. log_printf(0, "Failed to lookup hostname %s\n", addr);
  270. sendResponse(400, "Bad Request", socketToClient);
  271. return 400;
  272. }
  273. server.sin_addr.s_addr = *(unsigned int *)host->h_addr_list[0];
  274. }
  275. log_printf(1,"Tunneling connection to %s:%d\n",addr,port);
  276. int sock_s = socket(AF_INET, SOCK_STREAM, 0);
  277. if(-1 == ::connect(sock_s, (struct sockaddr *)&server, sizeof(server))) {
  278. perror("connect");
  279. sendResponse(400, "Bad Request", socketToClient);
  280. return 400;
  281. }
  282. send(sock_c, "HTTP/1.1 200 Connection established\r\n\r\n", 39, 0);
  283. TunnelSockets *sockets = new TunnelSockets();
  284. sockets->sock_c = sock_c;
  285. sockets->sock_s = sock_s;
  286. sockets->addr = addr;
  287. sockets->port = port;
  288. pthread_t thread;
  289. if(0 != pthread_create(&thread, NULL, tunnelMain, sockets))
  290. perror("pthread_create");
  291. pthread_detach(thread);
  292. return 0;
  293. }
  294. void BBS2chProxyConnection::connect(void)
  295. {
  296. char method[32], url[1024], protocol[32];
  297. int i;
  298. char *buf, *ptr;
  299. unsigned int datProxyMode = 0; // 0: no dat, 1: read.cgi or API, 2: force API, 3: kakolog
  300. regmatch_t match[7];
  301. long statusCode = 0;
  302. BBS2chProxyURL baseURL;
  303. BBS2chProxyHttpHeaders requestHeaders;
  304. socketToClient = new BBS2chProxyRawSocket(sock_c);
  305. buf = (char *)malloc(16384);
  306. if(!buf) goto end;
  307. beginHandleRequest:
  308. ptr = buf;
  309. if(!socketToClient->readLine(buf, 1024)) {
  310. sendResponse(400, "Bad Request", socketToClient);
  311. statusCode = 400;
  312. goto end;
  313. }
  314. i=0;
  315. while(*ptr != ' ' && *ptr != 0 && i < 32) method[i++] = *ptr++;
  316. if(*ptr == 0 || i == 32) {
  317. sendResponse(400, "Bad Request", socketToClient);
  318. statusCode = 400;
  319. goto end;
  320. }
  321. method[i] = 0;
  322. ptr++;
  323. i=0;
  324. while(*ptr != ' ' && *ptr != 0 && i < 1024) url[i++] = *ptr++;
  325. if(*ptr == 0 || i == 1024) {
  326. sendResponse(400, "Bad Request", socketToClient);
  327. statusCode = 400;
  328. goto end;
  329. }
  330. url[i] = 0;
  331. ptr++;
  332. i=0;
  333. while(*ptr != '\r' && *ptr != '\n' && *ptr != 0 && i < 32) protocol[i++] = *ptr++;
  334. if(*ptr == 0 || i == 32) {
  335. sendResponse(400, "Bad Request", socketToClient);
  336. statusCode = 400;
  337. goto end;
  338. }
  339. protocol[i] = 0;
  340. if(!strncasecmp(protocol,"HTTP/1.0",8)) {
  341. isClientHttp1_0 = true;
  342. }
  343. else isClientHttp1_0 = false;
  344. log_printf(1, "Received %s %s %s\n",method,url,protocol);
  345. if(strcasecmp(method,"GET") && strcasecmp(method,"POST") && strcasecmp(method,"HEAD") && strcasecmp(method,"CONNECT") && strcasecmp(method,"PUT") && strcasecmp(method, "OPTIONS")) {
  346. sendResponse(400, "Bad Request", socketToClient);
  347. statusCode = 400;
  348. goto end;
  349. }
  350. if(!url[0]) {
  351. sendResponse(400, "Bad Request", socketToClient);
  352. statusCode = 400;
  353. goto end;
  354. }
  355. if(strncasecmp(protocol,"HTTP",4)) {
  356. sendResponse(400, "Bad Request", socketToClient);
  357. statusCode = 400;
  358. goto end;
  359. }
  360. if(!strcasecmp(method,"CONNECT")) {
  361. if(!accept_https || baseURL.isValid()) {
  362. sendResponse(400, "Bad Request", socketToClient);
  363. statusCode = 400;
  364. goto end;
  365. }
  366. while(socketToClient->readLine(buf, 16384)) {
  367. if(!strcmp("\r\n",buf)) break;
  368. }
  369. int port = 443;
  370. char *ptr = strchr(url, ':');
  371. if(ptr) {
  372. *ptr = 0;
  373. port = atoi(ptr+1);
  374. }
  375. #ifdef USE_MITM
  376. bool useMITM = false;
  377. if (mitm_mode) {
  378. baseURL = BBS2chProxyURL("https", url);
  379. if (mitm_mode == 2) useMITM = true;
  380. else if (mitm_mode == 1 && baseURL.isFamilyOf5chNet()) useMITM = true;
  381. }
  382. if (useMITM) {
  383. socketToClient->writeString("HTTP/1.1 200 Connection established\r\n\r\n");
  384. if (port == 80) {
  385. baseURL.setScheme("http");
  386. goto beginHandleRequest;
  387. }
  388. else {
  389. try {
  390. BBS2chProxySecureSocket *secureSocket = new BBS2chProxySecureSocket(sock_c, url);
  391. delete socketToClient;
  392. socketToClient = secureSocket;
  393. if (port != 443) baseURL.setPort(port);
  394. isHttps = true;
  395. goto beginHandleRequest;
  396. } catch (const std::runtime_error& e) {
  397. log_printf(0, "%s\n", e.what());
  398. sendResponse(400, "Bad Request", socketToClient);
  399. statusCode = 400;
  400. goto end;
  401. }
  402. }
  403. }
  404. else
  405. #endif
  406. {
  407. statusCode = tunnel(url, port);
  408. /* if a return value is non-zero, tunnel function failed to establish a tunnelling connection */
  409. if (statusCode == 0) {
  410. delete socketToClient;
  411. socketToClient = NULL;
  412. }
  413. goto end;
  414. }
  415. }
  416. #if USE_MITM
  417. if (baseURL.isValid()) {
  418. requestURL = BBS2chProxyURL(baseURL, url);
  419. log_printf(1, "Running as MITM proxy for %s\n", requestURL.absoluteString().c_str());
  420. } else
  421. #endif
  422. requestURL = BBS2chProxyURL(url);
  423. if (!requestURL.isHttp()) {
  424. sendResponse(400, "Bad Request", socketToClient);
  425. statusCode = 400;
  426. goto end;
  427. }
  428. if (force_5chnet) {
  429. if (requestURL.getHost() != "menu.2ch.net" && requestURL.replaceHost("2ch.net", "5ch.net")) {
  430. force5ch = true;
  431. log_printf(1, "Detected *.2ch.net URL, changed target URL to %s\n", requestURL.absoluteString().c_str());
  432. }
  433. }
  434. /* parse request headers */
  435. while (socketToClient->readLine(buf, 16384)) {
  436. if (!strcmp("\r\n",buf)) break;
  437. requestHeaders.add(buf);
  438. }
  439. if (requestHeaders.hasNameAndValue("Transfer-Encoding", "chunked")) {
  440. isClientChunked = true;
  441. }
  442. else if (requestHeaders.has("Content-Length")) {
  443. content_length = atoi(requestHeaders.get("Content-Length").c_str());
  444. }
  445. if (requestHeaders.has("Expect")) {
  446. if (!strcasecmp(requestHeaders.get("Expect").c_str(), "100-continue") && !isClientHttp1_0) {
  447. log_printf(1, "Received Expect: 100-continue header, sending 100 Continue response to the client\n");
  448. socketToClient->writeString("HTTP/1.1 100 Continue\r\n\r\n");
  449. }
  450. }
  451. if (regexec(&regex, requestURL.absoluteString().c_str(), 6, match, 0) != REG_NOMATCH) {
  452. if ((appKey && (api_mode & 1)) || !requestURL.hostStartsWith("headline.")) datProxyMode = 1;
  453. }
  454. else if (regexec(&regex_kako, requestURL.absoluteString().c_str(), 7, match, 0) != REG_NOMATCH) {
  455. datProxyMode = 3;
  456. }
  457. else if (regexec(&regex_offlaw, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  458. const char *tmp = requestURL.absoluteString().c_str();
  459. const char *thread = strstr(tmp, "key=");
  460. if (thread) {
  461. match[6].rm_so = thread+4-tmp;
  462. match[6].rm_eo = thread+4-tmp;
  463. const char *ptr = thread+4;
  464. while (*ptr != '&' && *ptr != 0) {
  465. ptr++;
  466. match[6].rm_eo++;
  467. }
  468. if (match[6].rm_so != match[6].rm_eo) datProxyMode = 3;
  469. }
  470. }
  471. else if (api_override && appKey) {
  472. if (regexec(&regex_api, requestURL.absoluteString().c_str(), 5, match, 0) != REG_NOMATCH) {
  473. datProxyMode = 2;
  474. match[1] = match[2];
  475. match[5] = match[4];
  476. match[4] = match[3];
  477. }
  478. else if (regexec(&regex_api_auth, requestURL.absoluteString().c_str(), 2, match, 0) != REG_NOMATCH) {
  479. /* return dummy response immediately */
  480. log_printf(1, "Returning dummy response because API overriding is enabled\n");
  481. statusCode = 200;
  482. sendBasicHeaders(200, "OK", socketToClient);
  483. if (0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto end;
  484. if (0 >= socketToClient->writeString("Content-Length: 203\r\n\r\n")) goto end;
  485. if (0 >= socketToClient->writeString("SESSION-ID=Monazilla/1.00:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")) goto end;
  486. goto end;
  487. }
  488. }
  489. if (datProxyMode) {
  490. // match[1] : hostPrefix
  491. // match[2] : host
  492. // match[4] : bbs
  493. // match[5 or 6] : key
  494. const char *original = requestURL.absoluteString().c_str();
  495. const regmatch_t *threadMatch = datProxyMode != 3 ? match+5 : match+6;
  496. std::string board(original + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
  497. std::string thread(original + threadMatch->rm_so, threadMatch->rm_eo - threadMatch->rm_so);
  498. if (!appKey || (datProxyMode == 1 && !(api_mode & 1)) || datProxyMode == 3) {
  499. log_printf(1, "Retrieving thread via read.cgi...\n");
  500. std::string host(original + match[1].rm_so, match[2].rm_eo - match[1].rm_so);
  501. threadKey = host;
  502. threadKey += '/';
  503. threadKey += board;
  504. threadKey += '/';
  505. threadKey += thread;
  506. std::string targetURL = (force_5chnet_https || isHttps) ? "https://" : "http://";
  507. targetURL += host;
  508. targetURL += "/test/read.cgi/";
  509. targetURL += board;
  510. targetURL += '/';
  511. targetURL += thread;
  512. targetURL += '/';
  513. if (force_5chnet_https) isHttps = true;
  514. statusCode = datProxy(targetURL.c_str(), method, requestHeaders);
  515. }
  516. else {
  517. log_printf(1, "Retrieving thread via API...\n");
  518. std::string hostPrefix(original + match[1].rm_so, match[1].rm_eo - match[1].rm_so);
  519. std::string targetURL = "https://";
  520. targetURL += api_server;
  521. targetURL += "/v1/";
  522. targetURL += hostPrefix;
  523. targetURL += '/';
  524. targetURL += board;
  525. targetURL += '/';
  526. targetURL += thread;
  527. isHttps = true;
  528. statusCode = datProxyAPI(targetURL.c_str(), method, requestHeaders);
  529. }
  530. }
  531. else {
  532. if (force_5chnet_https && !isHttps && requestURL.isFamilyOf5chNet()) {
  533. requestURL.setScheme("https");
  534. isHttps = true;
  535. log_printf(1, "The host %s is 5ch.net family, connecting with HTTPS\n", requestURL.getHost().c_str());
  536. }
  537. if (bbsmenu_url && requestURL.equals(BBS2chProxyURL(bbsmenu_url), true)) {
  538. log_printf(1, "Running as a BBS menu proxy...\n");
  539. statusCode = bbsmenuProxy(requestURL.absoluteString().c_str(), method, requestHeaders);
  540. }
  541. else {
  542. bool isPostRequest = !strcasecmp(method, "POST");
  543. bool isPutRequest = !strcasecmp(method, "PUT");
  544. if (isPostRequest && requestURL.isFamilyOf5chNet() && requestURL.pathStartsWith("/test/bbs.cgi")) bbscgi = true;
  545. if (bbscgi) log_printf(1, "Looks like a request to bbs.cgi, will be modified before sending...\n");
  546. else log_printf(1, "Not a notable request, will be forwarded to server...\n");
  547. if (force_5chnet) {
  548. if (requestHeaders.has("Host")) {
  549. std::string host = requestHeaders.get("Host");
  550. size_t pos = host.find("2ch.net");
  551. if (pos != std::string::npos && pos+7 == host.length()) {
  552. if (pos == 0 || host[pos-1] == '.') {
  553. host.replace(pos, 1, "5");
  554. requestHeaders.set("Host", host);
  555. }
  556. }
  557. }
  558. if (bbscgi && requestHeaders.has("Referer")) {
  559. std::string referrer = requestHeaders.get("Referer");
  560. size_t pos = referrer.find("2ch.net");
  561. if (pos != std::string::npos) {
  562. if (pos == 0 || referrer[pos-1] == '.') {
  563. referrer.replace(pos, 1, "5");
  564. requestHeaders.set("Referer", referrer);
  565. }
  566. }
  567. }
  568. }
  569. requestHeaders.remove("Connection");
  570. if (user_agent) requestHeaders.remove("User-Agent");
  571. if (bbscgi && (content_length > 0 || isClientChunked)) {
  572. bool isNotFormURLEncoded = false;
  573. if (requestHeaders.has("Content-Type") && requestHeaders.get("Content-Type").find("application/x-www-form-urlencoded") == std::string::npos) {
  574. isNotFormURLEncoded = true;
  575. }
  576. if (!isNotFormURLEncoded) {
  577. requestHeaders.remove("Content-Length");
  578. if (!bbscgi_headers.empty()) {
  579. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it != bbscgi_headers.end(); it++) {
  580. if (requestHeaders.has(it->first)) {
  581. log_printf(1, "Ignoring header \"%s\" because custom header will be appended\n", it->first.c_str());
  582. requestHeaders.remove(it->first);
  583. }
  584. }
  585. }
  586. char *postdata = NULL;
  587. if (isClientChunked) {
  588. content_length = readChunkedBodyIntoBuffer(&postdata, socketToClient);
  589. requestHeaders.remove("Transfer-Encoding");
  590. }
  591. else {
  592. postdata = (char *)calloc(content_length+1, 1);
  593. content_length = socketToClient->read(postdata, content_length);
  594. }
  595. if (gikofix) {
  596. char *ptr = postdata+content_length-1;
  597. while (ptr >= postdata && (*ptr == '\r' || *ptr == '\n')) {
  598. *ptr-- = 0;
  599. }
  600. }
  601. curl_slist *headersForCurl = NULL;
  602. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  603. statusCode = bbsCgiProxy(requestURL.absoluteString().c_str(), requestHeaders, postdata);
  604. free(postdata);
  605. curl_slist_free_all(headersForCurl);
  606. goto end;
  607. }
  608. }
  609. curl_slist *headersForCurl = NULL;
  610. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl);
  611. if (!requestHeaders.has("Expect")) {
  612. headersForCurl = curl_slist_append(headersForCurl, "Expext:");
  613. }
  614. if(curl) {
  615. CURLcode res;
  616. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  617. curl_easy_setopt(curl, CURLOPT_URL, requestURL.absoluteString().c_str());
  618. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  619. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  620. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  621. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  622. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  623. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  624. if(content_length) {
  625. /* set Content-Length explicitly via API to work properly with curl >= 7.66.0 */
  626. if(isPostRequest)
  627. curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, content_length);
  628. else if(isPutRequest)
  629. curl_easy_setopt(curl, CURLOPT_INFILESIZE, content_length);
  630. }
  631. curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback_proxy);
  632. curl_easy_setopt(curl, CURLOPT_READDATA, this);
  633. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  634. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  635. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  636. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  637. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  638. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  639. if(user_agent) {
  640. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  641. }
  642. if(isPostRequest) {
  643. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  644. }
  645. else if(isPutRequest) {
  646. curl_easy_setopt(curl, CURLOPT_UPLOAD, 1L);
  647. }
  648. else if(!strcasecmp(method, "HEAD")) {
  649. curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  650. }
  651. else if(!strcasecmp(method, "OPTIONS")) {
  652. curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "OPTIONS");
  653. }
  654. if(proxy_server) {
  655. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  656. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  657. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  658. }
  659. res = curl_easy_perform(curl);
  660. if(res != CURLE_OK) {
  661. log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), requestURL.absoluteString().c_str());
  662. if(!status) sendResponse(503, "Service Unavailable", socketToClient);
  663. statusCode = 503;
  664. }
  665. else {
  666. if(isResponseChunked) {
  667. socketToClient->writeString("0\r\n\r\n");
  668. }
  669. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  670. }
  671. curl_easy_reset(curl);
  672. }
  673. curl_slist_free_all(headersForCurl);
  674. }
  675. }
  676. end:
  677. if(statusCode) log_printf(1, "Returned status code %d to client\n",statusCode);
  678. if(buf) free(buf);
  679. if(socketToClient) socketToClient->close();
  680. }
  681. int BBS2chProxyConnection::datProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  682. {
  683. DataStorage *html = NULL;
  684. long statusCode = 0;
  685. long rangeStart = 0, rangeEnd = 0;
  686. time_t lastModified = 0;
  687. time_t ifModifiedSince = 0;
  688. char *buf = (char *)malloc(16384);
  689. if(!buf) goto last;
  690. if(requestHeaders.has("Range")) {
  691. std::string value = requestHeaders.get("Range");
  692. if(value.find("bytes=") == 0 && value.find(",") == std::string::npos) {
  693. char *ptr = (char *)value.c_str() + 6;
  694. if(*ptr == '-') {
  695. rangeStart = atoi(ptr);
  696. }
  697. else {
  698. rangeStart = strtol(ptr, &ptr, 10);
  699. if(*ptr == '-') ptr++;
  700. if(*ptr && *ptr != '\r') {
  701. rangeEnd = strtol(ptr, NULL, 10);
  702. if(rangeEnd && rangeStart > rangeEnd) {
  703. sendResponse(416, "Requested range not satisfiable", socketToClient);
  704. statusCode = 416;
  705. goto last;
  706. }
  707. }
  708. }
  709. //fprintf(stderr, "range=%ld-%ld\n",rangeStart,rangeEnd);
  710. }
  711. else {
  712. sendResponse(416, "Requested range not satisfiable", socketToClient);
  713. statusCode = 416;
  714. goto last;
  715. }
  716. }
  717. if(requestHeaders.has("If-Modified-Since")) {
  718. struct tm time_ = {};
  719. strptime(requestHeaders.get("If-Modified-Since").c_str(), httpTimestampFmt, &time_);
  720. ifModifiedSince = mktime(&time_);
  721. }
  722. if(rangeStart > 0) {
  723. PBBS2chProxyThreadInfo info;
  724. pthread_mutex_lock(mutex);
  725. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  726. if(it != threadCache->end()) {
  727. info = it->second;
  728. }
  729. pthread_mutex_unlock(mutex);
  730. log_printf(5,"range request from %ld bytes\n",rangeStart);
  731. if(info) {
  732. int from = info->lastResNum;
  733. int alreadyRead = info->cachedSize;
  734. int lastResLength = info->cachedData->length;
  735. log_printf(5,"hit %s: cached %d bytes, last res size %d\n",threadKey.c_str(),alreadyRead,lastResLength);
  736. if(rangeStart <= alreadyRead && rangeStart >= alreadyRead - lastResLength) {
  737. if(curl) {
  738. CURLcode res;
  739. DataStorage *dat = new DataStorage();
  740. log_printf(5,"partial access from res num %d\n",from);
  741. snprintf(buf,16384,"%s%d-n",url,from);
  742. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  743. curl_easy_setopt(curl, CURLOPT_URL, buf);
  744. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  745. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  746. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  747. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  748. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  749. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  750. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  751. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  752. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  753. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  754. if(proxy_server) {
  755. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  756. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  757. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  758. }
  759. if(user_agent) {
  760. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  761. }
  762. else if(requestHeaders.has("User-Agent")) {
  763. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  764. }
  765. res = curl_easy_perform(curl);
  766. if(res == CURLE_OK) {
  767. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  768. curl_easy_reset(curl);
  769. if(statusCode == 200 && dat->length) {
  770. DataStorage *updated = html2dat(dat, from, &lastModified, true);
  771. if(ifModifiedSince && lastModified && updated && updated->length == lastResLength) {
  772. struct tm time_ = {};
  773. gmtime_r(&lastModified,&time_);
  774. time_t tmp = mktime(&time_);
  775. if(ifModifiedSince >= tmp) {
  776. sendResponse(304, "Not Modified", socketToClient);
  777. log_printf(5,"not modified!\n");
  778. delete updated;
  779. delete dat;
  780. statusCode = 304;
  781. goto last;
  782. }
  783. }
  784. if(updated && updated->length && updated->length >= lastResLength) {
  785. html = new DataStorage(alreadyRead - lastResLength);
  786. html->appendBytes(updated->bytes, updated->length);
  787. if(!rangeEnd) rangeEnd = html->length - 1;
  788. if(rangeStart > rangeEnd) {
  789. sendResponse(416, "Requested range not satisfiable", socketToClient);
  790. delete updated;
  791. delete dat;
  792. statusCode = 416;
  793. goto last;
  794. }
  795. statusCode = 206;
  796. log_printf(5,"cache hit; reconstructed data length:%ld\n",(long)html->length);
  797. }
  798. else {
  799. log_printf(5,"cache misshit?\n");
  800. sendResponse(416, "Requested range not satisfiable", socketToClient);
  801. delete updated;
  802. delete dat;
  803. statusCode = 416;
  804. goto last;
  805. }
  806. delete updated;
  807. }
  808. }
  809. else {
  810. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  811. curl_easy_reset(curl);
  812. }
  813. delete dat;
  814. if(html) goto resp;
  815. }
  816. }
  817. else {
  818. log_printf(5,"invalid cache contents\n");
  819. pthread_mutex_lock(mutex);
  820. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  821. if(it != threadCache->end()) {
  822. threadCache->erase(it);
  823. }
  824. pthread_mutex_unlock(mutex);
  825. }
  826. }
  827. }
  828. {
  829. if(curl) {
  830. CURLcode res;
  831. DataStorage *dat = new DataStorage();
  832. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  833. curl_easy_setopt(curl, CURLOPT_URL, url);
  834. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  835. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  836. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  837. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  838. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  839. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  840. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  841. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  842. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  843. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  844. if(proxy_server) {
  845. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  846. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  847. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  848. }
  849. if(user_agent) {
  850. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  851. }
  852. else if(requestHeaders.has("User-Agent")) {
  853. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  854. }
  855. res = curl_easy_perform(curl);
  856. if(res != CURLE_OK) {
  857. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  858. sendResponse(503, "Service Unavailable", socketToClient);
  859. curl_easy_reset(curl);
  860. delete dat;
  861. statusCode = 503;
  862. goto last;
  863. }
  864. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  865. curl_easy_reset(curl);
  866. if(statusCode == 200) {
  867. html = html2dat(dat, 1, &lastModified, false);
  868. }
  869. delete dat;
  870. }
  871. if(!html || !html->length) {
  872. sendResponse(503, "Service Unavailable", socketToClient);
  873. statusCode = 503;
  874. goto last;
  875. }
  876. if((rangeStart || rangeEnd) && html && html->length) {
  877. if(!rangeEnd) rangeEnd = html->length - 1;
  878. if(rangeStart < 0) rangeStart = html->length + rangeStart;
  879. if(rangeStart < html->length && rangeEnd < html->length && rangeStart <= rangeEnd) {
  880. statusCode = 206;
  881. }
  882. else {
  883. if(ifModifiedSince && lastModified && rangeStart == html->length) {
  884. struct tm time_ = {};
  885. gmtime_r(&lastModified,&time_);
  886. time_t tmp = mktime(&time_);
  887. if(ifModifiedSince >= tmp) {
  888. sendResponse(304, "Not Modified", socketToClient);
  889. log_printf(5,"not modified!\n");
  890. statusCode = 304;
  891. goto last;
  892. }
  893. }
  894. sendResponse(416, "Requested range not satisfiable", socketToClient);
  895. statusCode = 416;
  896. goto last;
  897. }
  898. }
  899. }
  900. resp:
  901. if(statusCode == 206) sendBasicHeaders(statusCode,"Partial Content",socketToClient);
  902. else sendBasicHeaders(statusCode,"OK",socketToClient);
  903. if(0 >= socketToClient->writeString("Content-Type: text/plain\r\n")) goto last;
  904. if(0 >= socketToClient->writeString("Accept-Ranges: bytes\r\n")) goto last;
  905. if(statusCode == 206) {
  906. std::ostringstream ss;
  907. ss << "Content-Range: bytes " << rangeStart << "-" << rangeEnd << "/" << html->length << "\r\n";
  908. if (0 >= socketToClient->writeString(ss.str())) goto last;
  909. //fprintf(stderr,"Content-Length: %ld\r\n",rangeEnd - rangeStart + 1);
  910. //fprintf(stderr,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length);
  911. DataStorage *newHtml = new DataStorage();
  912. newHtml->appendBytes(html->bytes+rangeStart, rangeEnd - rangeStart + 1);
  913. delete html;
  914. html = newHtml;
  915. }
  916. {
  917. std::ostringstream ss;
  918. ss << "Content-Length: " << html->length << "\r\n";
  919. if(0 >= socketToClient->writeString(ss.str())) goto last;
  920. }
  921. if(lastModified) {
  922. struct tm time_ = {};
  923. char date[256];
  924. gmtime_r(&lastModified,&time_);
  925. strftime(date,256,httpTimestampFmt,&time_);
  926. std::string header = "Last-Modified: ";
  927. header += date;
  928. header += "\r\n";
  929. if(0 >= socketToClient->writeString(header)) goto last;
  930. //fprintf(stderr,"Last-Modified: %s\r\n",date);
  931. }
  932. if(0 > socketToClient->writeString("\r\n")) goto last;
  933. if(html && statusCode >= 200 && statusCode < 300 && strcasecmp(method, "HEAD")) {
  934. if(html->length > socketToClient->write(html->bytes, html->length)) goto last;
  935. }
  936. last:
  937. if(buf) free(buf);
  938. if(html) delete html;
  939. return statusCode;
  940. }
  941. DataStorage *BBS2chProxyConnection::html2dat_old(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  942. {
  943. char *ptr = html->bytes;
  944. char *end = html->bytes + html->length - 1;
  945. DataStorage *txt = new DataStorage();
  946. int res = startResNum, i=0;
  947. char signature[32];
  948. char title[1024];
  949. int cachedSize = 0;
  950. bool bbspink = strstr(threadKey.c_str(),"bbspink.com") ? true : false;
  951. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  952. if(!ptr) {
  953. delete txt;
  954. return NULL;
  955. }
  956. ptr += 7;
  957. while(1) {
  958. if(*ptr == '<') {
  959. if(!strncasecmp(ptr,"</title>",8)) {
  960. ptr += 8;
  961. break;
  962. }
  963. else title[i++] = *ptr++;
  964. }
  965. else title[i++] = *ptr++;
  966. }
  967. title[i] = 0;
  968. snprintf(signature,32,"<dt>%d ",res);
  969. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  970. if(!ptr) {
  971. delete txt;
  972. return NULL;
  973. }
  974. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  975. if(!buffer) {
  976. delete txt;
  977. return NULL;
  978. }
  979. unsigned char *body = buffer;
  980. char *mail = (char *)body + 65536;
  981. char *name = mail + 1024;
  982. char *date = name + 1024;
  983. char *encrypted = date + 1024;
  984. while(ptr < end) {
  985. //fprintf(stderr,"%s\n",signature);
  986. DataStorage *resData = new DataStorage();
  987. i=0;
  988. mail[0] = 0;
  989. ptr = strstr(ptr,signature);
  990. ptr += strlen(signature);
  991. while(*ptr != '<') ptr++;
  992. ptr++;
  993. const char *endStr;
  994. if(*ptr == 'a' || *ptr == 'A') {
  995. replay:
  996. // has mail
  997. while(*ptr != '"') ptr++;
  998. ptr++;
  999. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1000. ptr += 28;
  1001. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1002. encrypted[i] = 0;
  1003. i = decryptMail((unsigned char *)mail,encrypted);
  1004. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1005. ptr -= reconstruct_len;
  1006. char *start = ptr;
  1007. memcpy(ptr, "<a href=\"mailto:", 16);
  1008. ptr += 16;
  1009. memcpy(ptr, mail, i);
  1010. ptr = start;
  1011. i=0;
  1012. goto replay;
  1013. }
  1014. else {
  1015. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1016. while(*ptr != '"') mail[i++] = *ptr++;
  1017. mail[i] = 0;
  1018. }
  1019. endStr = "</a>";
  1020. }
  1021. else if(*ptr == 'b') {
  1022. endStr = NULL;
  1023. }
  1024. else {
  1025. endStr = "</font>";
  1026. }
  1027. if(endStr) {
  1028. ptr = strstr(ptr,"<b>");
  1029. ptr += 3;
  1030. }
  1031. else {
  1032. ptr = strchr(ptr,'>');
  1033. ptr++;
  1034. }
  1035. i=0;
  1036. while(1) {
  1037. if(*ptr == '<') {
  1038. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  1039. ptr += 4;
  1040. if(endStr) ptr += strlen(endStr);
  1041. break;
  1042. }
  1043. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1044. int j=0;
  1045. ptr = strstr(ptr,"data-cfemail=\"");
  1046. ptr += 14;
  1047. while(*ptr != '"') encrypted[j++] = *ptr++;
  1048. encrypted[j] = 0;
  1049. j = decryptMail((unsigned char *)name+i,encrypted);
  1050. i += j;
  1051. ptr = strstr(ptr,"</script>");
  1052. ptr += 9;
  1053. }
  1054. else name[i++] = *ptr++;
  1055. }
  1056. else name[i++] = *ptr++;
  1057. }
  1058. resData->appendBytes(name, i);
  1059. resData->appendBytes("<>", 2);
  1060. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1061. resData->appendBytes("<>", 2);
  1062. ptr += 2;
  1063. i=0;
  1064. while(1) {
  1065. if(*ptr == '<') {
  1066. if(!strncasecmp(ptr,"<dd>",4)) {
  1067. ptr += 4;
  1068. break;
  1069. }
  1070. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  1071. memcpy(date+i,"BE:",3);
  1072. ptr += 23;
  1073. i += 3;
  1074. while(*ptr != ')') date[i++] = *ptr++;
  1075. date[i++] = '-';
  1076. ptr = strchr(ptr,'?');
  1077. ptr++;
  1078. char *tmp = strstr(ptr,"</a>");
  1079. memcpy(date+i,ptr,tmp-ptr);
  1080. i += tmp-ptr;
  1081. ptr = tmp + 4;
  1082. }
  1083. else date[i++] = *ptr++;
  1084. }
  1085. else date[i++] = *ptr++;
  1086. }
  1087. resData->appendBytes(date ,i);
  1088. resData->appendBytes("<>", 2);
  1089. i=0;
  1090. while(1) {
  1091. if(*ptr == '<') {
  1092. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  1093. ptr += 9;
  1094. break;
  1095. }
  1096. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  1097. while(i>0 &&body[i-1] == '\n') i--;
  1098. break;
  1099. }
  1100. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1101. int j=0;
  1102. ptr = strstr(ptr,"data-cfemail=\"");
  1103. ptr += 14;
  1104. while(*ptr != '"') encrypted[j++] = *ptr++;
  1105. encrypted[j] = 0;
  1106. j = decryptMail(body+i,encrypted);
  1107. i += j;
  1108. ptr = strstr(ptr,"</script>");
  1109. ptr += 9;
  1110. }
  1111. else if(!strncmp(ptr,"<a href=\"http",13)) {
  1112. ptr = strchr(ptr,'>');
  1113. ptr++;
  1114. char *link = ptr;
  1115. ptr = strstr(link,"</a>");
  1116. memcpy(body+i,link,ptr-link);
  1117. i += ptr-link;
  1118. ptr += 4;
  1119. }
  1120. else if(!strncmp(ptr,"<img src=\"",10)) {
  1121. ptr += 10;
  1122. char *img = ptr;
  1123. ptr = strstr(img,"\">");
  1124. memcpy(body+i,img,ptr-img);
  1125. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1126. int length = ptr-img;
  1127. while(*img != '/') {
  1128. img++;
  1129. length--;
  1130. }
  1131. memcpy(body+i,"sssp:",5);
  1132. memcpy(body+i+5,img,length);
  1133. i += length + 5;
  1134. }
  1135. else i += ptr-img;
  1136. ptr += 2;
  1137. }
  1138. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  1139. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1140. memcpy(body+i," <br>",5);
  1141. i += 5;
  1142. }
  1143. else {
  1144. memcpy(body+i,"<br>",4);
  1145. i += 4;
  1146. }
  1147. ptr += 4;
  1148. }
  1149. else body[i++] = *ptr++;
  1150. }
  1151. else if(!bbspink && *ptr == ' ') {
  1152. if(*(ptr+1) == ' ') ptr++;
  1153. else body[i++] = *ptr++;
  1154. }
  1155. else body[i++] = *ptr++;
  1156. }
  1157. resData->appendBytes(body ,i);
  1158. resData->appendBytes("<>", 2);
  1159. if(res == 1) resData->appendBytes(title ,strlen(title));
  1160. resData->appendBytes("\n" ,1);
  1161. if(useCache && res == startResNum) {
  1162. PBBS2chProxyThreadInfo info;
  1163. bool hit = false;
  1164. pthread_mutex_lock(mutex);
  1165. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1166. if(it != threadCache->end()) {
  1167. info = it->second;
  1168. threadCache->erase(it);
  1169. }
  1170. pthread_mutex_unlock(mutex);
  1171. if(info) {
  1172. log_printf(5,"cache hit");
  1173. if(info->cachedData->length == resData->length) {
  1174. log_printf(5,"... size match");
  1175. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1176. log_printf(5,"... content match");
  1177. hit = true;
  1178. cachedSize = info->cachedSize - resData->length;
  1179. }
  1180. }
  1181. log_printf(5,"\n");
  1182. }
  1183. if(!hit) {
  1184. delete resData;
  1185. free(buffer);
  1186. return NULL;
  1187. }
  1188. }
  1189. txt->appendBytes(resData->bytes, resData->length);
  1190. res++;
  1191. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1192. snprintf(signature,32,"<dt>%d ",res);
  1193. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  1194. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1195. info->lastResNum = res-1;
  1196. info->cachedSize = txt->length+cachedSize;
  1197. info->cachedData = resData;
  1198. pthread_mutex_lock(mutex);
  1199. threadCache->insert(std::make_pair(threadKey,info));
  1200. pthread_mutex_unlock(mutex);
  1201. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1202. if(lastModified) {
  1203. *lastModified = 0;
  1204. char formattedDate[256];
  1205. char *ptr;
  1206. ptr = date;
  1207. int year = strtol(ptr,&ptr,10);
  1208. if(*ptr != '/') break;
  1209. ptr++;
  1210. int month = strtol(ptr,&ptr,10);
  1211. if(*ptr != '/') break;
  1212. ptr++;
  1213. int day = strtol(ptr,&ptr,10);
  1214. if(!*ptr) break;
  1215. while(*ptr != ' ' && *ptr != 0) ptr++;
  1216. if(!*ptr) break;
  1217. ptr++;
  1218. int hour = strtol(ptr,&ptr,10);
  1219. if(*ptr != ':') break;
  1220. ptr++;
  1221. int minutes = strtol(ptr,&ptr,10);
  1222. if(*ptr != ':') break;
  1223. ptr++;
  1224. int seconds = strtol(ptr,&ptr,10);
  1225. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1226. if(year < 100) year += 2000;
  1227. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1228. //fprintf(stderr,"%s\n",formattedDate);
  1229. struct tm time = {};
  1230. strptime(formattedDate,threadTimestampFmt,&time);
  1231. *lastModified = mktime(&time);
  1232. //gmtime_r(lastModified,&time);
  1233. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1234. //fprintf(stderr,"%s\n",formattedDate);
  1235. }
  1236. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1237. break;
  1238. }
  1239. delete resData;
  1240. }
  1241. free(buffer);
  1242. return txt;
  1243. }
  1244. DataStorage *BBS2chProxyConnection::html2dat(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1245. {
  1246. char *ptr = html->bytes;
  1247. char *end = html->bytes + html->length - 1;
  1248. DataStorage *txt = new DataStorage();
  1249. int res = startResNum, i=0;
  1250. char signature[64];
  1251. char title[1024];
  1252. int cachedSize = 0;
  1253. char signatureTag[32];
  1254. char closeTag[32];
  1255. int closeTagLen;
  1256. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  1257. if(!ptr) {
  1258. delete txt;
  1259. return html2dat_old(html, startResNum, lastModified, useCache);
  1260. }
  1261. else {
  1262. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  1263. if(ptr2) {
  1264. char *tmp = ptr2;
  1265. *ptr2 = 0;
  1266. while(*ptr2 != '<') ptr2--;
  1267. strcpy(signatureTag, ptr2);
  1268. *tmp = ' ';
  1269. }
  1270. else {
  1271. delete txt;
  1272. return NULL;
  1273. }
  1274. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  1275. if(ptr2) {
  1276. delete txt;
  1277. return html2dat_pink(html, startResNum, lastModified, useCache);
  1278. }*/
  1279. }
  1280. ptr += 18;
  1281. while(1) {
  1282. if(*ptr == '<') {
  1283. if(!strncasecmp(ptr,"</h1>",5)) {
  1284. ptr += 5;
  1285. break;
  1286. }
  1287. else title[i++] = *ptr++;
  1288. }
  1289. else if(*ptr == '\n') break;
  1290. else title[i++] = *ptr++;
  1291. }
  1292. title[i] = 0;
  1293. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  1294. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1295. if(!ptr) {
  1296. delete txt;
  1297. return NULL;
  1298. }
  1299. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1300. if(!buffer) {
  1301. delete txt;
  1302. return NULL;
  1303. }
  1304. unsigned char *body = buffer;
  1305. char *mail = (char *)body + 65536;
  1306. char *name = mail + 1024;
  1307. char *date = name + 1024;
  1308. char *encrypted = date + 1024;
  1309. while(ptr < end) {
  1310. //fprintf(stderr,"%s\n",signature);
  1311. DataStorage *resData = new DataStorage();
  1312. i=0;
  1313. mail[0] = 0;
  1314. ptr = strstr(ptr," class=\"name\"><b>");
  1315. if(ptr) {
  1316. char *tmp = ptr;
  1317. *ptr = 0;
  1318. while(*ptr != '<') ptr--;
  1319. snprintf(closeTag,32,"</%s>",ptr+1);
  1320. closeTagLen = strlen(closeTag);
  1321. ptr = tmp + 17;
  1322. }
  1323. else {
  1324. delete resData;
  1325. break;
  1326. }
  1327. char endStr[64];
  1328. if(!strncmp(ptr,"<a href=\"mailto:",16)) {
  1329. replay:
  1330. // has mail
  1331. while(*ptr != '"') ptr++;
  1332. ptr++;
  1333. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1334. ptr += 28;
  1335. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1336. encrypted[i] = 0;
  1337. i = decryptMail((unsigned char *)mail,encrypted);
  1338. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1339. ptr -= reconstruct_len;
  1340. char *start = ptr;
  1341. memcpy(ptr, "<a href=\"mailto:", 16);
  1342. ptr += 16;
  1343. memcpy(ptr, mail, i);
  1344. ptr = start;
  1345. i=0;
  1346. goto replay;
  1347. }
  1348. else {
  1349. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1350. while(1) {
  1351. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  1352. ptr = strchr(ptr,'>');
  1353. ptr++;
  1354. char *link = ptr;
  1355. ptr = strstr(link,"</a>");
  1356. memcpy(mail+i,link,ptr-link);
  1357. i += ptr-link;
  1358. ptr += 4;
  1359. }
  1360. else if(*ptr == '"') break;
  1361. else mail[i++] = *ptr++;
  1362. }
  1363. //while(*ptr != '"') mail[i++] = *ptr++;
  1364. mail[i] = 0;
  1365. }
  1366. snprintf(endStr,64,"</a></b>%s",closeTag);
  1367. while(*ptr != '>') ptr++;
  1368. ptr++;
  1369. }
  1370. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  1371. emit font tags anymore and it conflicts with text decorations using "melon point" */
  1372. /*else if(!strncmp(ptr,"<font",5)) {
  1373. snprintf(endStr,64,"</font></b>%s",closeTag);
  1374. while(*ptr != '>') ptr++;
  1375. ptr++;
  1376. }*/
  1377. else {
  1378. snprintf(endStr,64,"</b>%s",closeTag);
  1379. }
  1380. i=0;
  1381. while(1) {
  1382. if(*ptr == '<') {
  1383. if(!strncmp(ptr,endStr,strlen(endStr))) {
  1384. ptr += strlen(endStr);
  1385. break;
  1386. }
  1387. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1388. int j=0;
  1389. ptr = strstr(ptr,"data-cfemail=\"");
  1390. ptr += 14;
  1391. while(*ptr != '"') encrypted[j++] = *ptr++;
  1392. encrypted[j] = 0;
  1393. j = decryptMail((unsigned char *)name+i,encrypted);
  1394. i += j;
  1395. ptr = strstr(ptr,"</script>");
  1396. ptr += 9;
  1397. }
  1398. else if(!strncmp(ptr,"<a href=\"",9)) {
  1399. ptr = strchr(ptr,'>');
  1400. ptr++;
  1401. char *link = ptr;
  1402. ptr = strstr(link,"</a>");
  1403. memcpy(name+i,link,ptr-link);
  1404. i += ptr-link;
  1405. ptr += 4;
  1406. }
  1407. else name[i++] = *ptr++;
  1408. }
  1409. else name[i++] = *ptr++;
  1410. }
  1411. resData->appendBytes(name, i);
  1412. resData->appendBytes("<>", 2);
  1413. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1414. resData->appendBytes("<>", 2);
  1415. ptr = strstr(ptr," class=\"date\">");
  1416. if(ptr) {
  1417. char *tmp = ptr;
  1418. *ptr = 0;
  1419. while(*ptr != '<') ptr--;
  1420. snprintf(closeTag,32,"</%s>",ptr+1);
  1421. closeTagLen = strlen(closeTag);
  1422. ptr = tmp + 14;
  1423. }
  1424. else {
  1425. delete resData;
  1426. break;
  1427. }
  1428. i=0;
  1429. while(1) {
  1430. if(*ptr == '<') {
  1431. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1432. ptr += closeTagLen;
  1433. break;
  1434. }
  1435. else date[i++] = *ptr++;
  1436. }
  1437. else date[i++] = *ptr++;
  1438. }
  1439. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  1440. char *tmp = ptr+1;
  1441. while(*ptr != ' ') ptr++;
  1442. *ptr = 0;
  1443. snprintf(closeTag,32,"</%s>",tmp);
  1444. closeTagLen = strlen(closeTag);
  1445. ptr += 11;
  1446. while(*ptr != '>') ptr++;
  1447. ptr++;
  1448. date[i++] = ' ';
  1449. while(1) {
  1450. if(*ptr == '<') {
  1451. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1452. ptr += closeTagLen;
  1453. break;
  1454. }
  1455. else date[i++] = *ptr++;
  1456. }
  1457. else date[i++] = *ptr++;
  1458. }
  1459. }
  1460. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  1461. ptr += 14;
  1462. while(*ptr != '>') ptr++;
  1463. ptr++;
  1464. if(!strncmp(ptr,"<a href=\"",9)) {
  1465. ptr += 9;
  1466. while(*ptr != '/' && *ptr != '"') ptr++;
  1467. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  1468. memcpy(date+i," BE:",4);
  1469. i += 4;
  1470. ptr += 18;
  1471. while(*ptr != '"') date[i++] = *ptr++;
  1472. date[i++] = '-';
  1473. ptr = strchr(ptr,'?');
  1474. ptr++;
  1475. char *tmp = strstr(ptr,"</a>");
  1476. memcpy(date+i,ptr,tmp-ptr);
  1477. i += tmp-ptr;
  1478. ptr = tmp + 4;
  1479. }
  1480. }
  1481. }
  1482. resData->appendBytes(date ,i);
  1483. resData->appendBytes("<>", 2);
  1484. if(!strcmp(signatureTag,"<div")) {
  1485. ptr = strstr(ptr,"<div class=\"message\">");
  1486. if(!ptr) {
  1487. delete resData;
  1488. break;
  1489. }
  1490. else {
  1491. ptr += 21;
  1492. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  1493. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  1494. strcpy(closeTag,"</span></span></div>");
  1495. closeTagLen = 20;
  1496. ptr += 22+17;
  1497. }
  1498. else {
  1499. strcpy(closeTag,"</span></div>");
  1500. closeTagLen = 13;
  1501. ptr += 22;
  1502. }
  1503. }
  1504. else {
  1505. strcpy(closeTag,"</div>");
  1506. closeTagLen = 6;
  1507. }
  1508. }
  1509. }
  1510. else {
  1511. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  1512. if(!ptr) {
  1513. delete resData;
  1514. break;
  1515. }
  1516. strcpy(closeTag,"</dd>");
  1517. closeTagLen = 5;
  1518. ptr += 22;
  1519. }
  1520. i=0;
  1521. while(1) {
  1522. if(*ptr == '<') {
  1523. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1524. ptr += closeTagLen;
  1525. break;
  1526. }
  1527. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1528. int j=0;
  1529. ptr = strstr(ptr,"data-cfemail=\"");
  1530. ptr += 14;
  1531. while(*ptr != '"') encrypted[j++] = *ptr++;
  1532. encrypted[j] = 0;
  1533. j = decryptMail(body+i,encrypted);
  1534. i += j;
  1535. ptr = strstr(ptr,"</script>");
  1536. ptr += 9;
  1537. }
  1538. else if(!strncmp(ptr,"<a ",3)) {
  1539. char *tmp = strchr(ptr,'>');
  1540. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  1541. char *link = tmp+1;
  1542. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  1543. while(ptr < link) {
  1544. if(!strncmp(ptr," class=\"",8)) {
  1545. ptr += 8;
  1546. while(*ptr != '"' && *ptr != '>') ptr++;
  1547. if(*ptr == '"') ptr++;
  1548. }
  1549. else body[i++] = *ptr++;
  1550. }
  1551. }
  1552. else {
  1553. ptr = strstr(link,"</a>");
  1554. memcpy(body+i,link,ptr-link);
  1555. i += ptr-link;
  1556. ptr += 4;
  1557. }
  1558. }
  1559. else if(!strncmp(ptr,"<img src=\"",10)) {
  1560. ptr += 10;
  1561. char *img = ptr;
  1562. ptr = strstr(img,"\">");
  1563. memcpy(body+i,img,ptr-img);
  1564. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1565. int length = ptr-img;
  1566. while(*img != '/') {
  1567. img++;
  1568. length--;
  1569. }
  1570. memcpy(body+i,"sssp:",5);
  1571. memcpy(body+i+5,img,length);
  1572. i += length + 5;
  1573. }
  1574. else i += ptr-img;
  1575. ptr += 2;
  1576. }
  1577. else if(!strncmp(ptr,"<br>",4)) {
  1578. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1579. memcpy(body+i," <br>",5);
  1580. i += 5;
  1581. }
  1582. else {
  1583. memcpy(body+i,"<br>",4);
  1584. i += 4;
  1585. }
  1586. ptr += 4;
  1587. }
  1588. else body[i++] = *ptr++;
  1589. }
  1590. else body[i++] = *ptr++;
  1591. }
  1592. resData->appendBytes(body ,i);
  1593. resData->appendBytes("<>", 2);
  1594. if(res == 1) resData->appendBytes(title ,strlen(title));
  1595. resData->appendBytes("\n" ,1);
  1596. if(useCache && res == startResNum) {
  1597. PBBS2chProxyThreadInfo info;
  1598. bool hit = false;
  1599. pthread_mutex_lock(mutex);
  1600. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1601. if(it != threadCache->end()) {
  1602. info = it->second;
  1603. threadCache->erase(it);
  1604. }
  1605. pthread_mutex_unlock(mutex);
  1606. if(info) {
  1607. log_printf(5,"cache hit");
  1608. if(info->cachedData->length == resData->length) {
  1609. log_printf(5,"... size match");
  1610. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1611. log_printf(5,"... content match");
  1612. hit = true;
  1613. cachedSize = info->cachedSize - resData->length;
  1614. }
  1615. }
  1616. log_printf(5,"\n");
  1617. }
  1618. if(!hit) {
  1619. delete resData;
  1620. free(buffer);
  1621. return NULL;
  1622. }
  1623. }
  1624. txt->appendBytes(resData->bytes, resData->length);
  1625. res++;
  1626. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1627. snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  1628. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1629. if(ptr) {
  1630. int next = atoi(ptr+strlen(signature));
  1631. if(next >= res) {
  1632. while(next > res) {
  1633. txt->appendBytes("broken<><>broken<> broken <>\n", 29);
  1634. res++;
  1635. }
  1636. }
  1637. else ptr = NULL;
  1638. }
  1639. if(!ptr) {
  1640. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1641. info->lastResNum = res-1;
  1642. info->cachedSize = txt->length+cachedSize;
  1643. info->cachedData = resData;
  1644. pthread_mutex_lock(mutex);
  1645. threadCache->insert(std::make_pair(threadKey,info));
  1646. pthread_mutex_unlock(mutex);
  1647. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1648. if(lastModified) {
  1649. *lastModified = 0;
  1650. char formattedDate[256];
  1651. char *ptr;
  1652. ptr = date;
  1653. int year = strtol(ptr,&ptr,10);
  1654. if(*ptr != '/') break;
  1655. ptr++;
  1656. int month = strtol(ptr,&ptr,10);
  1657. if(*ptr != '/') break;
  1658. ptr++;
  1659. int day = strtol(ptr,&ptr,10);
  1660. if(!*ptr) break;
  1661. while(*ptr != ' ' && *ptr != 0) ptr++;
  1662. if(!*ptr) break;
  1663. ptr++;
  1664. int hour = strtol(ptr,&ptr,10);
  1665. if(*ptr != ':') break;
  1666. ptr++;
  1667. int minutes = strtol(ptr,&ptr,10);
  1668. if(*ptr != ':') break;
  1669. ptr++;
  1670. int seconds = strtol(ptr,&ptr,10);
  1671. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1672. if(year < 100) year += 2000;
  1673. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1674. //fprintf(stderr,"%s\n",formattedDate);
  1675. struct tm time = {};
  1676. strptime(formattedDate,threadTimestampFmt,&time);
  1677. *lastModified = mktime(&time);
  1678. //gmtime_r(lastModified,&time);
  1679. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1680. //fprintf(stderr,"%s\n",formattedDate);
  1681. }
  1682. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1683. break;
  1684. }
  1685. delete resData;
  1686. }
  1687. free(buffer);
  1688. return txt;
  1689. }
  1690. int BBS2chProxyConnection::datProxyAPI(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1691. {
  1692. long statusCode = 0;
  1693. const std::string &postBody = auth->requestBodyForURL(url, curl);
  1694. bool directMode = false;
  1695. if (postBody.empty()) {
  1696. sendResponse(401, "Unauthorized", socketToClient);
  1697. return 401;
  1698. }
  1699. /* just read and strip off post body */
  1700. if (!strcasecmp(method, "POST")) {
  1701. char *postdata = NULL;
  1702. if (isClientChunked) {
  1703. readChunkedBodyIntoBuffer(&postdata, socketToClient);
  1704. }
  1705. else if (content_length) {
  1706. postdata = (char *)calloc(content_length+1, 1);
  1707. socketToClient->read(postdata, content_length);
  1708. }
  1709. if (postdata && strstr(postdata, "sid=")) directMode = true;
  1710. if (postdata) free(postdata);
  1711. }
  1712. if (curl) {
  1713. CURLcode res;
  1714. struct curl_slist *headersForCurl = NULL;
  1715. DataStorage receivedHeader;
  1716. DataStorage receivedBody;
  1717. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Range");
  1718. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "If-Modified-Since");
  1719. headersForCurl = requestHeaders.appendToCurlSlist(headersForCurl, "Accept-Encoding");
  1720. if (x_2ch_ua_dat) headersForCurl = curl_slist_append(headersForCurl, x_2ch_ua_dat);
  1721. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1722. curl_easy_setopt(curl, CURLOPT_URL, url);
  1723. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  1724. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1725. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1726. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1727. curl_easy_setopt(curl, CURLOPT_WRITEDATA, &receivedBody);
  1728. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_download);
  1729. curl_easy_setopt(curl, CURLOPT_HEADERDATA, &receivedHeader);
  1730. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1731. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1732. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1733. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1734. if (proxy_server) {
  1735. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1736. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1737. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1738. }
  1739. if (api_ua_dat) {
  1740. curl_easy_setopt(curl, CURLOPT_USERAGENT, api_ua_dat);
  1741. }
  1742. else {
  1743. if (user_agent && !strncmp(user_agent, "Monazilla/", strlen("Monazilla/")))
  1744. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1745. else
  1746. curl_easy_setopt(curl, CURLOPT_USERAGENT, "");
  1747. }
  1748. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  1749. #if LIBCURL_VERSION_NUM >= 0x071101
  1750. curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, postBody.c_str());
  1751. #else
  1752. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postBody.c_str());
  1753. #endif
  1754. //return;
  1755. res = curl_easy_perform(curl);
  1756. if (res == CURLE_OK) {
  1757. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  1758. receivedHeader.appendBytes("", 1);
  1759. const char *ptr = receivedHeader.bytes;
  1760. /* this is necessary because the raw header may contain chunk trailers after real headers */
  1761. const char *end = strstr(receivedHeader.bytes, "\r\n\r\n");
  1762. int threadStatus = 0;
  1763. if (end && !directMode) {
  1764. BBS2chProxyHttpHeaders headers;
  1765. while (ptr < end) {
  1766. const char *lineEnd = strchr(ptr, '\n');
  1767. if (!lineEnd) break;
  1768. headers.add(ptr, lineEnd-ptr);
  1769. ptr = lineEnd + 1;
  1770. }
  1771. if (headers.has("Thread-Status")) {
  1772. threadStatus = atoi(headers.get("Thread-Status").c_str());
  1773. }
  1774. }
  1775. if (threadStatus == 1 || (directMode && end)) {
  1776. if (end+4-receivedHeader.bytes > socketToClient->write(receivedHeader.bytes, end+4-receivedHeader.bytes)) goto last;
  1777. if (receivedBody.length > socketToClient->write(receivedBody.bytes, receivedBody.length)) goto last;
  1778. goto last;
  1779. }
  1780. else if (threadStatus == 8) {
  1781. sendBasicHeaders(302, "Found", socketToClient);
  1782. if (0 >= socketToClient->writeString("Location: http://www2.2ch.net/live.html\r\n")) goto last;
  1783. if (0 >= socketToClient->writeString("\r\n")) goto last;
  1784. statusCode = 302;
  1785. goto last;
  1786. }
  1787. else {
  1788. if (statusCode < 400) {
  1789. sendResponse(401, "Unauthorized", socketToClient);
  1790. statusCode = 401;
  1791. }
  1792. else {
  1793. sendResponse(503, "Service Unavailable", socketToClient);
  1794. statusCode = 503;
  1795. }
  1796. receivedBody.appendBytes("",1);
  1797. if (!strncasecmp(receivedBody.bytes,"ng (",4)) {
  1798. log_printf(0, "API gateway returned error: %s\n", receivedBody.bytes);
  1799. }
  1800. }
  1801. //fprintf(stderr,"%ld\n",statusCode);
  1802. }
  1803. else {
  1804. log_printf(0, "curl error: %s\n", curl_easy_strerror(res));
  1805. sendResponse(503, "Service Unavailable", socketToClient);
  1806. statusCode = 503;
  1807. }
  1808. last:
  1809. curl_easy_reset(curl);
  1810. curl_slist_free_all(headersForCurl);
  1811. }
  1812. return statusCode;
  1813. }
  1814. int BBS2chProxyConnection::bbsmenuProxy(const char *url, const char *method, BBS2chProxyHttpHeaders &requestHeaders)
  1815. {
  1816. long statusCode = 0;
  1817. DataStorage *dat = new DataStorage();
  1818. DataStorage *outHTML = new DataStorage();
  1819. if(curl) {
  1820. CURLcode res;
  1821. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1822. curl_easy_setopt(curl, CURLOPT_URL, url);
  1823. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1824. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1825. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  1826. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1827. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  1828. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  1829. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1830. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1831. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1832. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1833. if(proxy_server) {
  1834. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1835. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1836. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1837. }
  1838. if(user_agent) {
  1839. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1840. }
  1841. else if(requestHeaders.has("User-Agent")) {
  1842. curl_easy_setopt(curl, CURLOPT_USERAGENT, requestHeaders.get("User-Agent").c_str());
  1843. }
  1844. res = curl_easy_perform(curl);
  1845. if(res == CURLE_OK) {
  1846. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1847. if(statusCode == 200 && dat->length) {
  1848. dat->appendBytes("",1);
  1849. dat->length--;
  1850. char *ptr = dat->bytes;
  1851. while(*ptr) {
  1852. if(!strncasecmp(ptr,"<a href=",8)) {
  1853. char *start = ptr;
  1854. char *end = strchr(ptr+8,'>');
  1855. ptr = strstr(ptr+8,"://");
  1856. if(ptr && ptr < end) {
  1857. char *protocol = ptr;
  1858. char *end2 = strchr(ptr+3,'/');
  1859. ptr = strstr(protocol+3,"5ch.net");
  1860. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1861. memcpy(ptr,"2ch",3);
  1862. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1863. else outHTML->appendBytes(start, protocol-start);
  1864. outHTML->appendBytes(protocol, end-protocol);
  1865. ptr = end;
  1866. continue;
  1867. }
  1868. ptr = strstr(protocol+3,"bbspink.com");
  1869. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1870. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1871. else outHTML->appendBytes(start, protocol-start);
  1872. outHTML->appendBytes(protocol, end-protocol);
  1873. ptr = end;
  1874. continue;
  1875. }
  1876. }
  1877. ptr = start;
  1878. }
  1879. outHTML->appendBytes(ptr++, 1);
  1880. }
  1881. }
  1882. }
  1883. else {
  1884. log_printf(0,"curl error: %s (%s)\n", curl_easy_strerror(res), url);
  1885. statusCode = 503;
  1886. }
  1887. }
  1888. if(statusCode == 200) {
  1889. std::ostringstream ss;
  1890. ss << "Content-Length: " << outHTML->length << "\r\n";
  1891. sendBasicHeaders(statusCode,"OK",socketToClient);
  1892. if(0 >= socketToClient->writeString("Content-Type: text/html\r\n")) goto last;
  1893. if(0 >= socketToClient->writeString(ss.str())) goto last;
  1894. if(0 >= socketToClient->writeString("\r\n")) goto last;
  1895. if(strcasecmp(method, "HEAD")) {
  1896. if(outHTML->length > socketToClient->write(outHTML->bytes, outHTML->length)) goto last;
  1897. }
  1898. }
  1899. else {
  1900. sendResponse(503, "Service Unavailable", socketToClient);
  1901. statusCode = 503;
  1902. }
  1903. last:
  1904. if(curl) curl_easy_reset(curl);
  1905. if(dat) delete dat;
  1906. if(outHTML) delete outHTML;
  1907. return statusCode;
  1908. }
  1909. int BBS2chProxyConnection::bbsCgiProxy(const char *url, BBS2chProxyHttpHeaders &requestHeaders, const char *requestBody)
  1910. {
  1911. long statusCode = 0;
  1912. std::string hostStr = requestHeaders.get("Host");
  1913. std::string boardStr;
  1914. std::string threadStr;
  1915. requestHeaders.remove("Host");
  1916. if (user_agent) requestHeaders.set("User-Agent", user_agent);
  1917. if (requestBody && (lua_script || !bbscgi_headers.empty() || !bbscgi_postorder.empty())) {
  1918. std::map<std::string, std::string> fields;
  1919. const char *ptr = requestBody;
  1920. size_t bodyLength = 0;
  1921. while (1) {
  1922. const char *tmp = ptr;
  1923. while (*tmp != '=' && *tmp != 0) tmp++;
  1924. if (*tmp == 0) {
  1925. bodyLength = tmp - requestBody;
  1926. break;
  1927. }
  1928. std::string key(ptr, tmp-ptr);
  1929. tmp++;
  1930. ptr = tmp;
  1931. while (*tmp != '&' && *tmp != 0) tmp++;
  1932. std::string value(ptr, tmp-ptr);
  1933. fields.insert(std::make_pair(key, value));
  1934. if (*tmp == 0) {
  1935. bodyLength = tmp - requestBody;
  1936. break;
  1937. }
  1938. ptr = tmp + 1;
  1939. }
  1940. if (fields.count("bbs")) boardStr = fields["bbs"];
  1941. if (fields.count("key")) threadStr = fields["key"];
  1942. if (!bbscgi_postorder.empty()) {
  1943. std::string newBody;
  1944. for (std::vector<std::string>::iterator it = bbscgi_postorder.begin(); it != bbscgi_postorder.end(); it++) {
  1945. std::string &name = *it;
  1946. if (fields.count(name)) {
  1947. if (!newBody.empty()) newBody.append("&");
  1948. newBody.append(name);
  1949. newBody.append("=");
  1950. newBody.append(fields[name]);
  1951. fields.erase(name);
  1952. }
  1953. }
  1954. for (std::map<std::string, std::string>::iterator it = fields.begin(); it != fields.end(); it++) {
  1955. if (!newBody.empty()) newBody.append("&");
  1956. newBody.append(it->first);
  1957. newBody.append("=");
  1958. newBody.append(it->second);
  1959. }
  1960. if (bodyLength == newBody.length()) {
  1961. strcpy((char *)requestBody, newBody.c_str());
  1962. log_printf(1, "Reordered request body is: %s\n", requestBody);
  1963. }
  1964. else {
  1965. log_printf(0, "Error occured while reordering the request body - skipping\n");
  1966. }
  1967. }
  1968. }
  1969. if (!bbscgi_headers.empty()) {
  1970. for (std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it!=bbscgi_headers.end(); it++) {
  1971. /* we cannot use a reference here, because the original string shouldn't be replaced */
  1972. std::string value = it->second;
  1973. if (!hostStr.empty()) {
  1974. std::string::size_type pos = value.find("%HOST%");
  1975. while (pos != std::string::npos) {
  1976. value.replace(pos, 6, hostStr);
  1977. pos = value.find("%HOST%", pos+hostStr.length());
  1978. }
  1979. }
  1980. if (!boardStr.empty()) {
  1981. std::string::size_type pos = value.find("%BOARD%");
  1982. while (pos != std::string::npos) {
  1983. value.replace(pos, 7, boardStr);
  1984. pos = value.find("%BOARD%", pos+boardStr.length());
  1985. }
  1986. }
  1987. if (!threadStr.empty()) {
  1988. std::string::size_type pos = value.find("%THREAD%");
  1989. while (pos != std::string::npos) {
  1990. value.replace(pos, 8, threadStr);
  1991. pos = value.find("%THREAD%", pos+threadStr.length());
  1992. }
  1993. }
  1994. requestHeaders.set(it->first, value);
  1995. log_printf(1, "Appended custom header \"%s: %s\"\n", it->first.c_str(), value.c_str());
  1996. }
  1997. }
  1998. for (int run=0; run<2; run++) {
  1999. BBS2chProxyHttpHeaders *_headers = new BBS2chProxyHttpHeaders(requestHeaders);
  2000. curl_slist *headersForCurl = NULL;
  2001. char *_body = (char *)requestBody;
  2002. status = 0;
  2003. monaKeyForRequest = "";
  2004. #ifdef USE_LUA
  2005. if (lua_script) {
  2006. lua_State* l = luaL_newstate();
  2007. luaL_openlibs(l);
  2008. if (luaL_loadfile(l, lua_script) != LUA_OK) {
  2009. log_printf(0, "Lua: Failed to open script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  2010. goto lua_end;
  2011. }
  2012. lua_newtable(l);
  2013. lua_pushcfunction(l, lua_hmacSHA256);
  2014. lua_setfield(l, -2, "hmacSHA256");
  2015. lua_pushcfunction(l, lua_decodeURIComponent);
  2016. lua_setfield(l, -2, "decodeURIComponent");
  2017. lua_pushcfunction(l, lua_encodeURIComponent);
  2018. lua_setfield(l, -2, "encodeURIComponent");
  2019. lua_pushcfunction(l, lua_convertShiftJISToUTF8);
  2020. lua_setfield(l, -2, "convertShiftJISToUTF8");
  2021. lua_pushcfunction(l, lua_isExpiredKey);
  2022. lua_setfield(l, -2, "isExpiredKey");
  2023. lua_pushcfunction(l, lua_isValidAsUTF8);
  2024. lua_setfield(l, -2, "isValidAsUTF8");
  2025. lua_pushstring(l, getMonaKey().c_str());
  2026. lua_setfield(l, -2, "monaKey");
  2027. lua_setglobal(l, "proxy2ch");
  2028. BBS2chProxyHttpHeaders::getClassDefinitionForLua(l);
  2029. lua_setglobal(l, "HttpHeaders");
  2030. if (lua_pcall(l, 0, 0, 0) != LUA_OK) {
  2031. log_printf(0, "Lua: Failed to run script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  2032. goto lua_end;
  2033. }
  2034. lua_getglobal(l, "willSendRequestToBbsCgi");
  2035. if (!lua_isfunction(l, -1)) {
  2036. log_printf(0, "Lua: willSendRequestToBbsCgi function does not exist in the script\n");
  2037. goto lua_end;
  2038. }
  2039. lua_newtable(l);
  2040. _headers->getUserdataForLua(l);
  2041. lua_setfield(l, -2, "headers");
  2042. lua_pushstring(l, _body);
  2043. lua_setfield(l, -2, "body");
  2044. lua_pushstring(l, hostStr.c_str());
  2045. lua_pushstring(l, boardStr.c_str());
  2046. lua_pushstring(l, threadStr.c_str());
  2047. if (lua_pcall(l, 4, 1, 0) != LUA_OK) {
  2048. log_printf(0, "Lua: Failed to call willSendRequestToBbsCgi function:\n %s\n", lua_tostring(l, -1));
  2049. goto lua_end;
  2050. }
  2051. if (!lua_istable(l, -1)) {
  2052. log_printf(0, "Lua: A return type of willSendRequestToBbsCgi function should be a table\n");
  2053. goto lua_end;
  2054. }
  2055. lua_pushstring(l, "body");
  2056. lua_rawget(l, -2);
  2057. if (lua_isstring(l, -1)) {
  2058. const char *newBody = lua_tostring(l, -1);
  2059. _body = strdup(newBody);
  2060. log_printf(1, "Lua: Set request body \"%s\"\n", newBody);
  2061. }
  2062. lua_pop(l, 1);
  2063. lua_pushstring(l, "headers");
  2064. lua_rawget(l, -2);
  2065. if (lua_istable(l, -1)) {
  2066. delete _headers;
  2067. _headers = new BBS2chProxyHttpHeaders();
  2068. lua_pushnil(l);
  2069. while (lua_next(l, -2)) {
  2070. if (lua_isstring(l, -1) && lua_isstring(l, -2)) {
  2071. const char *name = lua_tostring(l, -2);
  2072. const char *value = lua_tostring(l, -1);
  2073. _headers->add(name, value);
  2074. log_printf(1, "Lua: Set request header \"%s: %s\"\n", name, value);
  2075. }
  2076. lua_pop(l, 1);
  2077. }
  2078. }
  2079. else if (lua_isuserdata(l, -1)) {
  2080. if (lua_getmetatable(l, -1)) {
  2081. if (lua_getfield(l, -1, "_type") == LUA_TSTRING) {
  2082. if (!strcmp(lua_tostring(l, -1), "HttpHeaders")) {
  2083. BBS2chProxyHttpHeaders *newHeaders = *((BBS2chProxyHttpHeaders **)lua_touserdata(l, -3));
  2084. if (newHeaders != _headers) {
  2085. /* remove metatable to prevent the object from garbage collected by lua */
  2086. lua_newtable(l);
  2087. lua_setmetatable(l, -4);
  2088. delete _headers;
  2089. _headers = newHeaders;
  2090. }
  2091. for (std::map<std::string, PBBS2chProxyHttpHeaderEntry>::iterator it = _headers->getMap().begin(); it != _headers->getMap().end(); it++) {
  2092. log_printf(1, "Lua: Set request header \"%s\"\n", it->second->getFull().c_str());
  2093. }
  2094. }
  2095. }
  2096. lua_pop(l, 2);
  2097. }
  2098. }
  2099. lua_end:
  2100. lua_close(l);
  2101. }
  2102. #endif
  2103. do {
  2104. bool isPink = hostStr.find("bbspink.com") != std::string::npos;
  2105. bool shouldSign = appKey && (((api_mode & 2) && !isPink) || (api_mode & 4));
  2106. bool shouldConvertBodyToUTF8 = (bbscgi_utf8 == 1 && shouldSign) || (bbscgi_utf8 == 2);
  2107. std::string userAgent = _headers->get("User-Agent");
  2108. if (_headers->has("X-MonaKey")) {
  2109. monaKeyForRequest = _headers->get("X-MonaKey");
  2110. }
  2111. if (shouldConvertBodyToUTF8 && !_headers->has("X-PostSig")) {
  2112. std::string newBody = convertBodyToUTF8(_body);
  2113. if (!newBody.empty()) {
  2114. if (_body != requestBody) {
  2115. free(_body);
  2116. }
  2117. _body = strdup(newBody.c_str());
  2118. log_printf(1, "Converted request body to UTF-8: %s\n", _body);
  2119. }
  2120. else {
  2121. log_printf(1, "Request body seems already to be UTF-8, will be sent without conversion\n");
  2122. }
  2123. _headers->set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
  2124. log_printf(1, "Appended header \"Content-Type: application/x-www-form-urlencoded; charset=UTF-8\"\n");
  2125. }
  2126. if (shouldSign && (!lua_script || !_headers->has("X-PostSig"))) {
  2127. if (userAgent.empty() && user_agent) userAgent = user_agent;
  2128. if (!userAgent.empty()) {
  2129. monaKeyForRequest = getMonaKey();
  2130. appendPostSignature(_body, userAgent, monaKeyForRequest, _headers);
  2131. } else {
  2132. log_printf(0, "API: User-Agent muse be set explicitly to post with API.\n");
  2133. }
  2134. }
  2135. if (!monaKeyForRequest.empty() && monaKeyIssueTime.count(monaKeyForRequest)) {
  2136. double issued = monaKeyIssueTime[monaKeyForRequest];
  2137. double now = getCurrentTime();
  2138. double wait = 4.0 - (now - issued);
  2139. if (wait > 0) {
  2140. log_printf(1, "Sleeping for %.1f seconds to avoid posting too fast...\n", wait);
  2141. #ifdef _WIN32
  2142. Sleep(wait * 1e+3);
  2143. #else
  2144. usleep(wait * 1e+6);
  2145. #endif
  2146. }
  2147. monaKeyIssueTime.erase(monaKeyForRequest);
  2148. }
  2149. headersForCurl = _headers->appendToCurlSlist(headersForCurl);
  2150. if (!_headers->has("Expect")) headersForCurl = curl_slist_append(headersForCurl, "Expect:");
  2151. if (!_headers->has("Accept")) headersForCurl = curl_slist_append(headersForCurl, "Accept:");
  2152. } while (0);
  2153. if (curl) {
  2154. CURLcode res;
  2155. if (curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  2156. curl_easy_setopt(curl, CURLOPT_URL, url);
  2157. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  2158. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  2159. if (run == 0)
  2160. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_bbscgi);
  2161. else
  2162. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  2163. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  2164. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  2165. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  2166. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  2167. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, _body);
  2168. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  2169. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  2170. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  2171. if (force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  2172. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  2173. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headersForCurl);
  2174. if (user_agent) {
  2175. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  2176. }
  2177. if (proxy_server) {
  2178. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  2179. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  2180. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  2181. }
  2182. res = curl_easy_perform(curl);
  2183. if (res != CURLE_OK) {
  2184. if (res == CURLE_WRITE_ERROR && status == 2) {
  2185. log_printf(1, "MonaKey should be reset. Sending the same request automatically...\n");
  2186. curl_easy_reset(curl);
  2187. curl_slist_free_all(headersForCurl);
  2188. delete _headers;
  2189. if (_body != requestBody) free(_body);
  2190. continue;
  2191. }
  2192. else {
  2193. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  2194. if (!status) sendResponse(503, "Service Unavailable", socketToClient);
  2195. statusCode = 503;
  2196. }
  2197. }
  2198. else {
  2199. if (isResponseChunked) {
  2200. socketToClient->writeString("0\r\n\r\n");
  2201. }
  2202. curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &statusCode);
  2203. }
  2204. curl_easy_reset(curl);
  2205. }
  2206. curl_slist_free_all(headersForCurl);
  2207. delete _headers;
  2208. if (_body != requestBody) free(_body);
  2209. break;
  2210. }
  2211. return statusCode;
  2212. }
  2213. void BBS2chProxyConnection::compileRegex(void)
  2214. {
  2215. static int compiled;
  2216. if (compiled) return;
  2217. regcomp(&regex, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/dat/([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2218. regcomp(&regex_kako, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/([^/]+)/kako/[0-9]+/([0-9]+/)?([0-9]+)\\.dat", REG_EXTENDED|REG_ICASE);
  2219. regcomp(&regex_offlaw, "^https?://([^:/.]+)\\.(2ch\\.net|5ch\\.net|bbspink\\.com)(:[0-9]+)?/test/offlaw2.so\\?.*bbs=([^&]+)", REG_EXTENDED|REG_ICASE);
  2220. regcomp(&regex_api, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/([^/]+)/([^/]+)/([0-9]+)", REG_EXTENDED|REG_ICASE);
  2221. regcomp(&regex_api_auth, "^https?://api\\.[25]ch\\.net(:[0-9]+)?/v1/auth/?$", REG_EXTENDED|REG_ICASE);
  2222. compiled = 1;
  2223. }