BBS2chProxyConnection.cpp 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031
  1. #include <pthread.h>
  2. #include <time.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <unistd.h>
  6. #include <curl/curl.h>
  7. #ifdef USE_LUA
  8. #include <lua.hpp>
  9. #endif
  10. #ifdef _WIN32
  11. #include <fcntl.h>
  12. #include <winsock2.h>
  13. #include <ws2tcpip.h>
  14. #include <mswsock.h>
  15. #define CLOSESOCKET(x) closesocket(x)
  16. #else
  17. #include <sys/socket.h>
  18. #include <netinet/in.h>
  19. #include <netdb.h>
  20. #include <arpa/inet.h>
  21. #define CLOSESOCKET(x) close(x)
  22. #endif
  23. #include "BBS2chProxyConnection.h"
  24. #include "DataStorage.h"
  25. #include "hmac.h"
  26. //#define DEBUG 1
  27. extern char *proxy_server;
  28. extern long proxy_port;
  29. extern long proxy_type;
  30. extern long timeout;
  31. extern char *user_agent;
  32. extern char *api_ua_dat;
  33. extern char *x_2ch_ua_dat;
  34. extern char *appKey;
  35. extern int allow_chunked;
  36. extern int curl_features;
  37. extern unsigned int curl_version_number;
  38. extern bool accept_https;
  39. extern int force_5chnet;
  40. extern int force_5chnet_https;
  41. extern int force_ipv4;
  42. extern char *bbsmenu_url;
  43. extern char *api_server;
  44. extern std::map<std::string, std::string> bbscgi_headers;
  45. extern int gikofix;
  46. extern CURLSH *curl_share;
  47. extern char *lua_script;
  48. extern void log_printf(int level, const char *format ...);
  49. #include "utils.h"
  50. #ifdef USE_LUA
  51. extern "C" {
  52. static int lua_hmacSHA256(lua_State *l)
  53. {
  54. static const char *table = "0123456789abcdef";
  55. size_t keyLength, dataLength;
  56. const char *key = luaL_checklstring(l, 1, &keyLength);
  57. const char *data = luaL_checklstring(l, 2, &dataLength);
  58. if (!key || !data) return 0;
  59. unsigned char digest[32];
  60. char digestStr[65];
  61. proxy2ch_HMAC_SHA256(key, keyLength, data, dataLength, digest);
  62. for (int i=0; i<32; i++) {
  63. unsigned char c = digest[i];
  64. unsigned char upper = (c >> 4) & 0xf;
  65. unsigned char lower = c & 0xf;
  66. digestStr[i*2] = table[upper];
  67. digestStr[i*2+1] = table[lower];
  68. }
  69. digestStr[64] = 0;
  70. lua_pushstring(l, digestStr);
  71. return 1;
  72. }
  73. static int lua_decodeURIComponent(lua_State *l)
  74. {
  75. size_t length;
  76. const char *input = luaL_checklstring(l, 1, &length);
  77. if (!input) return 0;
  78. bool decodePlus = true;
  79. if (!lua_isnoneornil(l, 2)) {
  80. decodePlus = (lua_toboolean(l, 2));
  81. }
  82. std::string output;
  83. for (int i=0;i<length;i++) {
  84. if (input[i] == '%') {
  85. if (i < length - 2) {
  86. char from[3];
  87. char *end;
  88. from[0] = input[i+1];
  89. from[1] = input[i+2];
  90. from[2] = 0;
  91. unsigned long n = strtoul(from, &end, 16);
  92. if (n < 256 && end == from+2) {
  93. output.append(1, n);
  94. i += 2;
  95. continue;
  96. }
  97. }
  98. }
  99. else if (decodePlus && input[i] == '+') {
  100. output.append(" ");
  101. continue;
  102. }
  103. output.append(1, input[i]);
  104. }
  105. lua_pushstring(l, output.c_str());
  106. return 1;
  107. }
  108. }
  109. static std::string monaKey;
  110. void BBS2chProxyConnection::setMonaKey(const std::string &key) {
  111. pthread_mutex_lock(mutex);
  112. monaKey = key;
  113. pthread_mutex_unlock(mutex);
  114. }
  115. std::string BBS2chProxyConnection::getMonaKey() {
  116. if (monaKey.empty())
  117. return "00000000-0000-0000-0000-000000000000";
  118. return monaKey;
  119. }
  120. #endif
  121. void *BBS2chProxyConnection::launch(void *param)
  122. {
  123. reinterpret_cast<BBS2chProxyConnection *>(param)->connect();
  124. return (void *)NULL;
  125. }
  126. void BBS2chProxyConnection::run(void)
  127. {
  128. pthread_t thread;
  129. pthread_attr_t thread_attr;
  130. pthread_attr_init(&thread_attr);
  131. pthread_attr_setdetachstate(&thread_attr , PTHREAD_CREATE_DETACHED);
  132. if(0 != pthread_create(&thread , &thread_attr , &BBS2chProxyConnection::launch , this))
  133. perror("pthread_create");
  134. pthread_attr_destroy(&thread_attr);
  135. }
  136. void *BBS2chProxyConnection::tunnel_c2s(void *param)
  137. {
  138. char buf[2048];
  139. fd_set fds;
  140. int sock_c = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_c;
  141. int sock_s = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_s;
  142. #ifdef _WIN32
  143. struct timeval timeout;
  144. timeout.tv_sec = 0;
  145. timeout.tv_usec = 10000;
  146. #endif
  147. while(1) {
  148. #ifdef _WIN32
  149. FD_ZERO(&fds);
  150. FD_SET(sock_c,&fds);
  151. if(select(sock_c + 1,&fds,NULL,NULL,&timeout) < 0) break;
  152. if(FD_ISSET(sock_c,&fds)) {
  153. #endif
  154. int ret = recv(sock_c, buf, 2048, 0);
  155. if(ret > 0) send(sock_s, buf, ret, 0);
  156. else if(ret <= 0) break;
  157. #ifdef _WIN32
  158. }
  159. #endif
  160. }
  161. //fprintf(stderr,"tunnel_c2s end\n");
  162. CLOSESOCKET(sock_s);
  163. return NULL;
  164. }
  165. void *BBS2chProxyConnection::tunnel_s2c(void *param)
  166. {
  167. char buf[2048];
  168. fd_set fds;
  169. int sock_c = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_c;
  170. int sock_s = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_s;
  171. while(1) {
  172. int ret = recv(sock_s, buf, 2048, 0);
  173. if(ret > 0) send(sock_c, buf, ret, 0);
  174. else if(ret <= 0) break;
  175. }
  176. //fprintf(stderr,"tunnel_s2c end\n");
  177. CLOSESOCKET(sock_c);
  178. return NULL;
  179. }
  180. int BBS2chProxyConnection::tunnel(const char *addr, int port)
  181. {
  182. struct sockaddr_in server;
  183. memset(&server, 0, sizeof(server));
  184. server.sin_family = AF_INET;
  185. server.sin_addr.s_addr = inet_addr(addr);
  186. server.sin_port = htons(port);
  187. if(server.sin_addr.s_addr == 0xffffffff) {
  188. struct hostent *host;
  189. host = gethostbyname(addr);
  190. if (host == NULL) {
  191. sendResponse(400, "Bad Request", fpw);
  192. return -1;
  193. }
  194. server.sin_addr.s_addr = *(unsigned int *)host->h_addr_list[0];
  195. }
  196. log_printf(1,"Tunneling connection to %s:%d\n",addr,port);
  197. sock_s = socket(AF_INET, SOCK_STREAM, 0);
  198. if(-1 == ::connect(sock_s, (struct sockaddr *)&server, sizeof(server))) {
  199. perror("connect");
  200. sendResponse(400, "Bad Request", fpw);
  201. return -1;
  202. }
  203. send(sock_c, "HTTP/1.1 200 Connection established\r\n\r\n", 39, 0);
  204. pthread_t thread_c2s, thread_s2c;
  205. if(0 != pthread_create(&thread_c2s, NULL, &BBS2chProxyConnection::tunnel_c2s, this))
  206. perror("pthread_create");
  207. if(0 != pthread_create(&thread_s2c, NULL, &BBS2chProxyConnection::tunnel_s2c, this))
  208. perror("pthread_create");
  209. pthread_join(thread_c2s, NULL);
  210. pthread_join(thread_s2c, NULL);
  211. log_printf(1,"Tunneling connection to %s:%d finished\n",addr,port);
  212. return 0;
  213. }
  214. void BBS2chProxyConnection::connect(void)
  215. {
  216. char method[32], url[1024], protocol[32];
  217. int i;
  218. char *buf, *ptr;
  219. bool html2dat = false;
  220. bool html2dat_kako = false;
  221. regmatch_t match[7];
  222. long statusCode = 0;
  223. #ifdef _WIN32
  224. int sock_osfhandle = _open_osfhandle(sock_c, O_RDONLY);
  225. fpr = fdopen(sock_osfhandle, "rb");
  226. fpw = fdopen(sock_osfhandle, "wb");
  227. #else
  228. fpr = fdopen(sock_c, "rb");
  229. fpw = fdopen(sock_c, "wb");
  230. #endif
  231. if(!fpr || !fpw) {
  232. log_printf(0, "Error: cannot open file descripter for client\n");
  233. goto end;
  234. }
  235. buf = (char *)malloc(16384);
  236. if(!buf) goto end;
  237. ptr = buf;
  238. if(!fgets(buf,1024,fpr)) {
  239. sendResponse(400, "Bad Request", fpw);
  240. statusCode = 400;
  241. goto end;
  242. }
  243. i=0;
  244. while(*ptr != ' ' && *ptr != 0 && i < 32) method[i++] = *ptr++;
  245. if(*ptr == 0 || i == 32) {
  246. sendResponse(400, "Bad Request", fpw);
  247. statusCode = 400;
  248. goto end;
  249. }
  250. method[i] = 0;
  251. ptr++;
  252. i=0;
  253. while(*ptr != ' ' && *ptr != 0 && i < 1024) url[i++] = *ptr++;
  254. if(*ptr == 0 || i == 1024) {
  255. sendResponse(400, "Bad Request", fpw);
  256. statusCode = 400;
  257. goto end;
  258. }
  259. url[i] = 0;
  260. ptr++;
  261. i=0;
  262. while(*ptr != '\r' && *ptr != '\n' && *ptr != 0 && i < 32) protocol[i++] = *ptr++;
  263. if(*ptr == 0 || i == 32) {
  264. sendResponse(400, "Bad Request", fpw);
  265. statusCode = 400;
  266. goto end;
  267. }
  268. protocol[i] = 0;
  269. log_printf(1, "Received %s %s %s\n",method,url,protocol);
  270. if(strcasecmp(method,"GET") && strcasecmp(method,"POST") && strcasecmp(method,"HEAD") && strcasecmp(method,"CONNECT")) {
  271. sendResponse(400, "Bad Request", fpw);
  272. statusCode = 400;
  273. goto end;
  274. }
  275. if(!url[0]) {
  276. sendResponse(400, "Bad Request", fpw);
  277. statusCode = 400;
  278. goto end;
  279. }
  280. if(strncasecmp(protocol,"HTTP",4)) {
  281. sendResponse(400, "Bad Request", fpw);
  282. statusCode = 400;
  283. goto end;
  284. }
  285. if(!strcasecmp(method,"CONNECT")) {
  286. if(!accept_https) {
  287. sendResponse(400, "Bad Request", fpw);
  288. statusCode = 400;
  289. goto end;
  290. }
  291. while(fgets(buf,16384,fpr)) {
  292. if(!strcmp("\r\n",buf)) break;
  293. }
  294. int port = 443;
  295. char *ptr = strchr(url, ':');
  296. if(ptr) {
  297. *ptr = 0;
  298. port = atoi(ptr+1);
  299. }
  300. tunnel(url, port);
  301. goto end;
  302. }
  303. if(force_5chnet && !strncasecmp(url,"http://",7)) {
  304. char *ptr = url+8;
  305. char *end = ptr;
  306. while(*end != '/' && *end != 0) end++;
  307. ptr = strstr(ptr,".2ch.net");
  308. if(ptr && ptr < end && memcmp(ptr-4,"menu.",5)) {
  309. memcpy(ptr+1,"5ch",3);
  310. force5ch = true;
  311. log_printf(1, "Detected *.2ch.net URL, changed target URL to %s\n",url);
  312. }
  313. }
  314. if(regexec(&regex, url, 6, match, 0) != REG_NOMATCH) {
  315. if(appKey || strncasecmp(url+match[1].rm_so,"headline.",9)) html2dat = true;
  316. }
  317. else if(regexec(&regex_kako, url, 7, match, 0) != REG_NOMATCH) {
  318. html2dat_kako = true;
  319. }
  320. else if(regexec(&regex_offlaw, url, 5, match, 0) != REG_NOMATCH) {
  321. char *thread = strstr(url,"key=");
  322. if(thread) {
  323. match[6].rm_so = thread+4-url;
  324. match[6].rm_eo = thread+4-url;
  325. char *ptr = thread+4;
  326. while(*ptr != '&' && *ptr != 0) {
  327. ptr++;
  328. match[6].rm_eo++;
  329. }
  330. if(match[6].rm_so != match[6].rm_eo) html2dat_kako = true;
  331. }
  332. }
  333. if(html2dat || html2dat_kako) {
  334. char tmp[1024];
  335. regmatch_t *tid = html2dat ? match+5 : match+6;
  336. strcpy(tmp,url);
  337. tmp[match[2].rm_eo] = 0;
  338. tmp[match[4].rm_eo] = 0;
  339. tmp[tid->rm_eo] = 0;
  340. if(!appKey || html2dat_kako) {
  341. log_printf(1, "Retrieving thread via read.cgi...\n");
  342. snprintf(url,1024,"%s/%s/%s",tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  343. threadKey = std::string(url);
  344. snprintf(url,1024,"%s://%s/test/read.cgi/%s/%s/",force_5chnet_https?"https":"http",tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  345. statusCode = datProxy(url, method);
  346. }
  347. else {
  348. log_printf(1, "Retrieving thread via API...\n");
  349. tmp[match[1].rm_eo] = 0;
  350. snprintf(url,1024,"https://%s/v1/%s/%s/%s",api_server,tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  351. statusCode = datProxyAPI(url, method);
  352. }
  353. }
  354. else {
  355. char urlMod[1024];
  356. strcpy(urlMod,url);
  357. if(force_5chnet_https) {
  358. const char *host = "5ch.net";
  359. char *ptr = strstr(url,host);
  360. if(!ptr) {
  361. host = "2ch.net";
  362. ptr = strstr(url,host);
  363. }
  364. if(!ptr) {
  365. host = "bbspink.com";
  366. ptr = strstr(url,host);
  367. }
  368. if(ptr) {
  369. char *start = url+7;
  370. char *end = strchr(start,'/');
  371. int hostLength = strlen(host);
  372. if(!end) end = url+strlen(url);
  373. if(ptr >= start && ptr < end && (*(ptr-1) == '.' || *(ptr-1) == '/') && (*(ptr+hostLength) == '/' || *(ptr+hostLength) == ':' || *(ptr+hostLength) == 0)) {
  374. log_printf(1, "This is %s URL, connecting with HTTPS\n",host);
  375. if(*(ptr+hostLength) == ':') {
  376. *(ptr+hostLength) = 0;
  377. snprintf(urlMod,1024,"https://%s%s",start,end);
  378. *(ptr+hostLength) = ':';
  379. }
  380. else snprintf(urlMod,1024,"https://%s",start);
  381. log_printf(2, "URL is: %s\n",urlMod);
  382. }
  383. }
  384. }
  385. if(bbsmenu_url && !strcmp(url, bbsmenu_url)) {
  386. log_printf(1, "Running as a BBS menu proxy...\n");
  387. statusCode = bbsmenuProxy(urlMod, method);
  388. }
  389. else {
  390. bool isPostRequest = !strcasecmp(method, "POST");
  391. if(isPostRequest && (strstr(urlMod,".5ch.net") || strstr(urlMod,".bbspink.com")) && strstr(urlMod,"/test/bbs.cgi")) bbscgi = true;
  392. log_printf(1, "Not a thread request, passthrough...\n");
  393. struct curl_slist *headers = NULL;
  394. bool hasExpect = false;
  395. char *postdata = NULL;
  396. std::string hostStr;
  397. std::string boardStr;
  398. std::string threadStr;
  399. bool isNotFormURLEncoded = false;
  400. #ifdef USE_LUA
  401. std::map<std::string, std::string> headersForLua;
  402. #endif
  403. while(fgets(buf,16384,fpr)) {
  404. if(bbscgi) {
  405. char *ptr = strchr(buf, ':');
  406. if(ptr) {
  407. std::string header(buf, ptr-buf);
  408. if(bbscgi_headers.find(header) != bbscgi_headers.end()) {
  409. log_printf(1, "Ignoring header \"%s\" because alternative value exists\n", header.c_str());
  410. continue;
  411. }
  412. }
  413. }
  414. //fprintf(fpw,"%s",buf);
  415. //fprintf(stderr,"%s",buf);
  416. if(!strcmp("\r\n",buf)) break;
  417. if(!strncasecmp("Connection:",buf,11)) continue;
  418. else if(!strncasecmp("Host:",buf,5)) {
  419. char *ptr;
  420. ptr = strchr(buf, '\r');
  421. if(!ptr) ptr = strchr(buf, '\n');
  422. if(!ptr) continue;
  423. *ptr = 0;
  424. ptr = strchr(buf+5, ':');
  425. if(ptr) *ptr = 0;
  426. ptr = buf+5;
  427. while(*ptr == ' ') ptr++;
  428. if(force_5chnet) {
  429. char *ptr2 = strstr(ptr, ".2ch.net");
  430. if(ptr2) {
  431. *(ptr2+1) = '5';
  432. }
  433. }
  434. hostStr = std::string(ptr);
  435. continue;
  436. }
  437. //else if(!strncasecmp("Authorization:",buf,14)) continue;
  438. //else if(!strncasecmp("WWW-Authenticate:",buf,17)) continue;
  439. else if(user_agent && !strncasecmp("User-Agent:",buf,11)) continue;
  440. else if(bbscgi && !strncasecmp("Content-Length:",buf,15)) {
  441. char *ptr = buf + 15;
  442. while(*ptr == ' ') ptr++;
  443. content_length = atoi(ptr);
  444. continue;
  445. }
  446. else {
  447. char *ptr;
  448. ptr = strchr(buf, '\r');
  449. if(!ptr) ptr = strchr(buf, '\n');
  450. if(!ptr) continue;
  451. *ptr = 0;
  452. if(bbscgi && force_5chnet && !strncasecmp("Referer:",buf,8)) {
  453. char *ptr2 = strstr(buf+8, ".2ch.net");
  454. if(ptr2) {
  455. *(ptr2+1) = '5';
  456. }
  457. }
  458. headers = curl_slist_append(headers, buf);
  459. if(!strncasecmp("Content-Length:",buf,15)) {
  460. char *ptr = buf + 15;
  461. while(*ptr == ' ') ptr++;
  462. content_length = atoi(ptr);
  463. }
  464. else if(!strncasecmp("Expect:",buf,7)) {
  465. hasExpect = true;
  466. }
  467. else if(!strncasecmp("Content-Type:",buf,13)) {
  468. ptr = buf + 13;
  469. while(*ptr == ' ') ptr++;
  470. if(strncasecmp("application/x-www-form-urlencoded", ptr, 33)) {
  471. isNotFormURLEncoded = true;
  472. }
  473. }
  474. #ifdef USE_LUA
  475. if(bbscgi && lua_script) {
  476. ptr = strchr(buf, ':');
  477. if(ptr) {
  478. std::string header(buf, ptr-buf);
  479. ptr++;
  480. while(*ptr == ' ') ptr++;
  481. headersForLua.insert(std::make_pair(header, std::string(ptr)));
  482. }
  483. }
  484. #endif
  485. }
  486. }
  487. #ifdef USE_LUA
  488. if(bbscgi && lua_script && user_agent) {
  489. headersForLua.insert(std::make_pair("User-Agent", std::string(user_agent)));
  490. }
  491. #endif
  492. if(bbscgi && !isNotFormURLEncoded && content_length) {
  493. postdata = (char *)calloc(content_length+1, 1);
  494. content_length = fread(postdata,1,content_length,fpr);
  495. if(gikofix && content_length > 0) {
  496. char *ptr = postdata+content_length-1;
  497. while(*ptr == '\r' || *ptr == '\n') {
  498. *ptr-- = 0;
  499. }
  500. }
  501. }
  502. if(bbscgi && (!bbscgi_headers.empty() || lua_script)) {
  503. std::map<std::string, std::string> fields;
  504. if(postdata) {
  505. const char *ptr = postdata;
  506. while(1) {
  507. const char *tmp = ptr;
  508. while(*tmp != '=' && *tmp != 0) tmp++;
  509. if(*tmp == 0) break;
  510. std::string key(ptr, tmp-ptr);
  511. tmp++;
  512. ptr = tmp;
  513. while(*tmp != '&' && *tmp != 0) tmp++;
  514. std::string value(ptr, tmp-ptr);
  515. fields.insert(std::make_pair(key, value));
  516. if(*tmp == 0) break;
  517. ptr = tmp + 1;
  518. }
  519. }
  520. boardStr = fields["bbs"];
  521. threadStr = fields["key"];
  522. for(std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it!=bbscgi_headers.end(); it++) {
  523. /* we cannot use a reference here, because the original string shouldn't be replaced */
  524. std::string value = it->second;
  525. if(!hostStr.empty()) {
  526. std::string::size_type pos = value.find("%HOST%");
  527. while(pos != std::string::npos) {
  528. value.replace(pos, 6, hostStr);
  529. pos = value.find("%HOST%", pos+hostStr.length());
  530. }
  531. }
  532. if(!boardStr.empty()) {
  533. std::string::size_type pos = value.find("%BOARD%");
  534. while(pos != std::string::npos) {
  535. value.replace(pos, 7, boardStr);
  536. pos = value.find("%BOARD%", pos+boardStr.length());
  537. }
  538. }
  539. if(!threadStr.empty()) {
  540. std::string::size_type pos = value.find("%THREAD%");
  541. while(pos != std::string::npos) {
  542. value.replace(pos, 8, threadStr);
  543. pos = value.find("%THREAD%", pos+threadStr.length());
  544. }
  545. }
  546. snprintf(buf,16384,"%s: %s",it->first.c_str(),value.c_str());
  547. headers = curl_slist_append(headers, buf);
  548. log_printf(1, "Appended custom header \"%s\"\n", buf);
  549. #ifdef USE_LUA
  550. if(lua_script) headersForLua[it->first] = value;
  551. #endif
  552. }
  553. }
  554. #ifdef USE_LUA
  555. if(bbscgi && lua_script) {
  556. lua_State* l = luaL_newstate();
  557. luaL_openlibs(l);
  558. if(luaL_loadfile(l, lua_script) != LUA_OK) {
  559. log_printf(0, "Lua: Failed to open script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  560. goto lua_end;
  561. }
  562. lua_newtable(l);
  563. lua_pushstring(l, "hmacSHA256");
  564. lua_pushcfunction(l, lua_hmacSHA256);
  565. lua_settable(l, -3);
  566. lua_pushstring(l, "decodeURIComponent");
  567. lua_pushcfunction(l, lua_decodeURIComponent);
  568. lua_settable(l, -3);
  569. lua_pushstring(l, "monaKey");
  570. lua_pushstring(l, getMonaKey().c_str());
  571. lua_settable(l, -3);
  572. lua_setglobal(l, "proxy2ch");
  573. if(lua_pcall(l, 0, 0, 0) != LUA_OK) {
  574. log_printf(0, "Lua: Failed to run script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  575. goto lua_end;
  576. }
  577. lua_getglobal(l, "willSendRequestToBbsCgi");
  578. if(!lua_isfunction(l, -1)) {
  579. log_printf(0, "Lua: willSendRequestToBbsCgi function does not exist in the script\n");
  580. goto lua_end;
  581. }
  582. lua_newtable(l);
  583. lua_pushstring(l, "headers");
  584. lua_newtable(l);
  585. for(std::map<std::string, std::string>::iterator it = headersForLua.begin(); it!=headersForLua.end(); it++) {
  586. lua_pushstring(l, it->first.c_str());
  587. lua_pushstring(l, it->second.c_str());
  588. lua_settable(l, -3);
  589. }
  590. lua_settable(l, -3);
  591. lua_pushstring(l, "body");
  592. lua_pushstring(l, postdata);
  593. lua_settable(l, -3);
  594. lua_pushstring(l, hostStr.c_str());
  595. lua_pushstring(l, boardStr.c_str());
  596. lua_pushstring(l, threadStr.c_str());
  597. if(lua_pcall(l, 4, 1, 0) != LUA_OK) {
  598. log_printf(0, "Lua: Failed to call willSendRequestToBbsCgi function:\n %s\n", lua_tostring(l, -1));
  599. goto lua_end;
  600. }
  601. if(!lua_istable(l, -1)) {
  602. log_printf(0, "Lua: A return type of willSendRequestToBbsCgi function should be a table\n");
  603. goto lua_end;
  604. }
  605. lua_pushstring(l, "body");
  606. lua_rawget(l, -2);
  607. if(lua_isstring(l, -1)) {
  608. size_t length;
  609. const char *newBody = lua_tolstring(l, -1, &length);
  610. if(length > content_length) {
  611. postdata = (char *)realloc(postdata, length+1);
  612. }
  613. strcpy(postdata, newBody);
  614. log_printf(1, "Lua: Set request body \"%s\"\n", newBody);
  615. }
  616. lua_pop(l, 1);
  617. lua_pushstring(l, "headers");
  618. lua_rawget(l, -2);
  619. if(lua_istable(l, -1)) {
  620. curl_slist_free_all(headers);
  621. headers = NULL;
  622. lua_pushnil(l);
  623. while(lua_next(l, -2)) {
  624. if(lua_isstring(l, -1) && lua_isstring(l, -2)) {
  625. std::string header = lua_tostring(l, -2);
  626. header += ": ";
  627. header += lua_tostring(l, -1);
  628. headers = curl_slist_append(headers, header.c_str());
  629. log_printf(1, "Lua: Set request header \"%s\"\n", header.c_str());
  630. }
  631. lua_pop(l, 1);
  632. }
  633. }
  634. lua_end:
  635. lua_close(l);
  636. }
  637. #endif
  638. if(!hasExpect) headers = curl_slist_append(headers, "Expect:");
  639. CURL *curl = curl_easy_init();
  640. if(curl) {
  641. CURLcode res;
  642. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  643. curl_easy_setopt(curl, CURLOPT_URL, urlMod);
  644. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  645. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  646. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  647. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  648. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  649. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  650. if(postdata) {
  651. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postdata);
  652. } else {
  653. if(content_length && isPostRequest) {
  654. /* set Content-Length explicitly via API to work properly with curl >= 7.66.0 */
  655. curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, content_length);
  656. }
  657. curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback_proxy);
  658. curl_easy_setopt(curl, CURLOPT_READDATA, this);
  659. }
  660. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  661. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  662. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  663. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  664. if(!strncasecmp(protocol,"HTTP/1.0",8) && !strncasecmp(urlMod, "http://", 7)) {
  665. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
  666. }
  667. else {
  668. /* force use HTTP 1.1 because CURL_HTTP_VERSION_2TLS is used on curl (w/ nghttp2) >= 7.62.0 */
  669. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  670. }
  671. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
  672. if(user_agent) {
  673. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  674. }
  675. if(isPostRequest) {
  676. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  677. }
  678. else if(!strcasecmp(method, "HEAD")) {
  679. curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  680. }
  681. if(proxy_server) {
  682. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  683. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  684. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  685. }
  686. res = curl_easy_perform(curl);
  687. if(res != CURLE_OK) {
  688. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),urlMod);
  689. if(!status) sendResponse(503, "Service Unavailable", fpw);
  690. statusCode = 503;
  691. }
  692. else {
  693. if(chunked) {
  694. fprintf(fpw,"0\r\n\r\n");
  695. }
  696. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  697. }
  698. curl_easy_cleanup(curl);
  699. fflush(fpw);
  700. }
  701. curl_slist_free_all(headers);
  702. if(postdata) free(postdata);
  703. }
  704. }
  705. end:
  706. if(statusCode) log_printf(1, "Returned status code %d to client\n",statusCode);
  707. if(buf) free(buf);
  708. if(fpr) fclose(fpr);
  709. if(fpw) fclose(fpw);
  710. close(sock_c);
  711. delete this;
  712. }
  713. int BBS2chProxyConnection::datProxy(const char *url, const char *method)
  714. {
  715. DataStorage *html = NULL;
  716. long statusCode = 0;
  717. long rangeStart = 0, rangeEnd = 0;
  718. time_t lastModified = 0;
  719. time_t ifModifiedSince = 0;
  720. char *buf = (char *)malloc(16384);
  721. char userAgentFromHeader[1024] = "";
  722. if(!buf) goto last;
  723. while(fgets(buf,16384,fpr)) {
  724. //fprintf(stderr,"%s",buf);
  725. if(!strcmp("\r\n",buf)) break;
  726. if(!strncasecmp("Range:",buf,6)) {
  727. if(strstr(buf+7,"bytes=") && !strchr(buf+7, ',')) {
  728. char *ptr = buf+13;
  729. if(*ptr == '-') {
  730. rangeStart = atoi(ptr);
  731. }
  732. else {
  733. rangeStart = strtol(ptr, &ptr, 10);
  734. if(*ptr == '-') ptr++;
  735. if(*ptr && *ptr != '\r') {
  736. rangeEnd = strtol(ptr, NULL, 10);
  737. if(rangeEnd && rangeStart > rangeEnd) {
  738. sendResponse(416, "Requested range not satisfiable", fpw);
  739. statusCode = 416;
  740. goto last;
  741. }
  742. }
  743. }
  744. //fprintf(stderr, "range=%ld-%ld\n",rangeStart,rangeEnd);
  745. }
  746. else {
  747. sendResponse(416, "Requested range not satisfiable", fpw);
  748. statusCode = 416;
  749. goto last;
  750. }
  751. }
  752. else if(!strncasecmp("If-Modified-Since:",buf,18)) {
  753. struct tm time_ = {};
  754. strptime(buf+19,httpTimestampFmt,&time_);
  755. ifModifiedSince = mktime(&time_);
  756. }
  757. else if(!strncasecmp("User-Agent:",buf,11)) {
  758. int i=0;
  759. char *ptr = buf+12;
  760. while(*ptr != '\r' && *ptr != '\n' && i < 1023) userAgentFromHeader[i++] = *ptr++;
  761. userAgentFromHeader[i] = 0;
  762. }
  763. }
  764. if(rangeStart > 0) {
  765. PBBS2chProxyThreadInfo info;
  766. pthread_mutex_lock(mutex);
  767. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  768. if(it != threadCache->end()) {
  769. info = it->second;
  770. }
  771. pthread_mutex_unlock(mutex);
  772. log_printf(5,"range request from %ld bytes\n",rangeStart);
  773. if(info) {
  774. int from = info->lastResNum;
  775. int alreadyRead = info->cachedSize;
  776. int lastResLength = info->cachedData->length;
  777. log_printf(5,"hit %s: cached %d bytes, last res size %d\n",threadKey.c_str(),alreadyRead,lastResLength);
  778. if(rangeStart <= alreadyRead && rangeStart >= alreadyRead - lastResLength) {
  779. CURL *curl = curl_easy_init();
  780. if(curl) {
  781. CURLcode res;
  782. DataStorage *dat = new DataStorage();
  783. log_printf(5,"partial access from res num %d\n",from);
  784. snprintf(buf,16384,"%s%d-n",url,from);
  785. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  786. curl_easy_setopt(curl, CURLOPT_URL, buf);
  787. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  788. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  789. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  790. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  791. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  792. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  793. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  794. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  795. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  796. if(proxy_server) {
  797. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  798. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  799. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  800. }
  801. if(user_agent) {
  802. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  803. }
  804. else if(userAgentFromHeader[0]) {
  805. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  806. }
  807. res = curl_easy_perform(curl);
  808. if(res == CURLE_OK) {
  809. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  810. curl_easy_cleanup(curl);
  811. if(statusCode == 200 && dat->length) {
  812. DataStorage *updated = html2dat(dat, from, &lastModified, true);
  813. if(ifModifiedSince && lastModified && updated && updated->length == lastResLength) {
  814. struct tm time_ = {};
  815. gmtime_r(&lastModified,&time_);
  816. time_t tmp = mktime(&time_);
  817. if(ifModifiedSince >= tmp) {
  818. sendResponse(304, "Not Modified", fpw);
  819. log_printf(5,"not modified!\n");
  820. delete updated;
  821. delete dat;
  822. statusCode = 304;
  823. goto last;
  824. }
  825. }
  826. if(updated && updated->length && updated->length >= lastResLength) {
  827. html = new DataStorage(alreadyRead - lastResLength);
  828. html->appendBytes(updated->bytes, updated->length);
  829. if(!rangeEnd) rangeEnd = html->length - 1;
  830. if(rangeStart > rangeEnd) {
  831. sendResponse(416, "Requested range not satisfiable", fpw);
  832. delete updated;
  833. delete dat;
  834. statusCode = 416;
  835. goto last;
  836. }
  837. statusCode = 206;
  838. log_printf(5,"cache hit; reconstructed data length:%ld\n",(long)html->length);
  839. }
  840. else {
  841. log_printf(5,"cache misshit?\n");
  842. sendResponse(416, "Requested range not satisfiable", fpw);
  843. delete updated;
  844. delete dat;
  845. statusCode = 416;
  846. goto last;
  847. }
  848. delete updated;
  849. }
  850. }
  851. else {
  852. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  853. curl_easy_cleanup(curl);
  854. }
  855. delete dat;
  856. if(html) goto resp;
  857. }
  858. }
  859. else {
  860. log_printf(5,"invalid cache contents\n");
  861. pthread_mutex_lock(mutex);
  862. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  863. if(it != threadCache->end()) {
  864. threadCache->erase(it);
  865. }
  866. pthread_mutex_unlock(mutex);
  867. }
  868. }
  869. }
  870. {
  871. CURL *curl = curl_easy_init();
  872. if(curl) {
  873. CURLcode res;
  874. DataStorage *dat = new DataStorage();
  875. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  876. curl_easy_setopt(curl, CURLOPT_URL, url);
  877. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  878. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  879. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  880. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  881. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  882. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  883. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  884. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  885. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  886. if(proxy_server) {
  887. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  888. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  889. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  890. }
  891. if(user_agent) {
  892. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  893. }
  894. else if(userAgentFromHeader[0]) {
  895. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  896. }
  897. res = curl_easy_perform(curl);
  898. if(res != CURLE_OK) {
  899. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  900. sendResponse(503, "Service Unavailable", fpw);
  901. curl_easy_cleanup(curl);
  902. delete dat;
  903. statusCode = 503;
  904. goto last;
  905. }
  906. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  907. curl_easy_cleanup(curl);
  908. if(statusCode == 200) {
  909. html = html2dat(dat, 1, &lastModified, false);
  910. }
  911. delete dat;
  912. }
  913. if(!html || !html->length) {
  914. sendResponse(503, "Service Unavailable", fpw);
  915. statusCode = 503;
  916. goto last;
  917. }
  918. if((rangeStart || rangeEnd) && html && html->length) {
  919. if(!rangeEnd) rangeEnd = html->length - 1;
  920. if(rangeStart < 0) rangeStart = html->length + rangeStart;
  921. if(rangeStart < html->length && rangeEnd < html->length && rangeStart <= rangeEnd) {
  922. statusCode = 206;
  923. }
  924. else {
  925. if(ifModifiedSince && lastModified && rangeStart == html->length) {
  926. struct tm time_ = {};
  927. gmtime_r(&lastModified,&time_);
  928. time_t tmp = mktime(&time_);
  929. if(ifModifiedSince >= tmp) {
  930. sendResponse(304, "Not Modified", fpw);
  931. log_printf(5,"not modified!\n");
  932. statusCode = 304;
  933. goto last;
  934. }
  935. }
  936. sendResponse(416, "Requested range not satisfiable", fpw);
  937. statusCode = 416;
  938. goto last;
  939. }
  940. }
  941. }
  942. resp:
  943. if(statusCode == 206) sendBasicHeaders(statusCode,"Partial Content",fpw);
  944. else sendBasicHeaders(statusCode,"OK",fpw);
  945. if(0 > fprintf(fpw,"Content-Type: text/plain\r\n")) goto last;
  946. if(0 > fprintf(fpw,"Accept-Ranges: bytes\r\n")) goto last;
  947. if(statusCode == 206) {
  948. if(0 > fprintf(fpw,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length)) goto last;
  949. //fprintf(stderr,"Content-Length: %ld\r\n",rangeEnd - rangeStart + 1);
  950. //fprintf(stderr,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length);
  951. DataStorage *newHtml = new DataStorage();
  952. newHtml->appendBytes(html->bytes+rangeStart, rangeEnd - rangeStart + 1);
  953. delete html;
  954. html = newHtml;
  955. }
  956. if(0 > fprintf(fpw,"Content-Length: %ld\r\n",(long)html->length)) goto last;
  957. if(lastModified) {
  958. struct tm time_ = {};
  959. char date[256];
  960. gmtime_r(&lastModified,&time_);
  961. strftime(date,256,httpTimestampFmt,&time_);
  962. if(0 > fprintf(fpw,"Last-Modified: %s\r\n",date)) goto last;
  963. //fprintf(stderr,"Last-Modified: %s\r\n",date);
  964. }
  965. if(0 > fprintf(fpw,"\r\n")) goto last;
  966. if(html && statusCode >= 200 && statusCode < 300 && strcasecmp(method, "HEAD")) {
  967. if(html->length > fwrite(html->bytes,1,html->length,fpw)) goto last;
  968. }
  969. fflush(fpw);
  970. last:
  971. if(buf) free(buf);
  972. if(html) delete html;
  973. return statusCode;
  974. }
  975. DataStorage *BBS2chProxyConnection::html2dat_old(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  976. {
  977. char *ptr = html->bytes;
  978. char *end = html->bytes + html->length - 1;
  979. DataStorage *txt = new DataStorage();
  980. int res = startResNum, i=0;
  981. char signature[32];
  982. char title[1024];
  983. int cachedSize = 0;
  984. bool bbspink = strstr(threadKey.c_str(),"bbspink.com") ? true : false;
  985. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  986. if(!ptr) {
  987. delete txt;
  988. return NULL;
  989. }
  990. ptr += 7;
  991. while(1) {
  992. if(*ptr == '<') {
  993. if(!strncasecmp(ptr,"</title>",8)) {
  994. ptr += 8;
  995. break;
  996. }
  997. else title[i++] = *ptr++;
  998. }
  999. else title[i++] = *ptr++;
  1000. }
  1001. title[i] = 0;
  1002. snprintf(signature,32,"<dt>%d ",res);
  1003. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1004. if(!ptr) {
  1005. delete txt;
  1006. return NULL;
  1007. }
  1008. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1009. if(!buffer) {
  1010. delete txt;
  1011. return NULL;
  1012. }
  1013. unsigned char *body = buffer;
  1014. char *mail = (char *)body + 65536;
  1015. char *name = mail + 1024;
  1016. char *date = name + 1024;
  1017. char *encrypted = date + 1024;
  1018. while(ptr < end) {
  1019. //fprintf(stderr,"%s\n",signature);
  1020. DataStorage *resData = new DataStorage();
  1021. i=0;
  1022. mail[0] = 0;
  1023. ptr = strstr(ptr,signature);
  1024. ptr += strlen(signature);
  1025. while(*ptr != '<') ptr++;
  1026. ptr++;
  1027. const char *endStr;
  1028. if(*ptr == 'a' || *ptr == 'A') {
  1029. replay:
  1030. // has mail
  1031. while(*ptr != '"') ptr++;
  1032. ptr++;
  1033. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1034. ptr += 28;
  1035. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1036. encrypted[i] = 0;
  1037. i = decryptMail((unsigned char *)mail,encrypted);
  1038. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1039. ptr -= reconstruct_len;
  1040. char *start = ptr;
  1041. memcpy(ptr, "<a href=\"mailto:", 16);
  1042. ptr += 16;
  1043. memcpy(ptr, mail, i);
  1044. ptr = start;
  1045. i=0;
  1046. goto replay;
  1047. }
  1048. else {
  1049. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1050. while(*ptr != '"') mail[i++] = *ptr++;
  1051. mail[i] = 0;
  1052. }
  1053. endStr = "</a>";
  1054. }
  1055. else if(*ptr == 'b') {
  1056. endStr = NULL;
  1057. }
  1058. else {
  1059. endStr = "</font>";
  1060. }
  1061. if(endStr) {
  1062. ptr = strstr(ptr,"<b>");
  1063. ptr += 3;
  1064. }
  1065. else {
  1066. ptr = strchr(ptr,'>');
  1067. ptr++;
  1068. }
  1069. i=0;
  1070. while(1) {
  1071. if(*ptr == '<') {
  1072. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  1073. ptr += 4;
  1074. if(endStr) ptr += strlen(endStr);
  1075. break;
  1076. }
  1077. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1078. int j=0;
  1079. ptr = strstr(ptr,"data-cfemail=\"");
  1080. ptr += 14;
  1081. while(*ptr != '"') encrypted[j++] = *ptr++;
  1082. encrypted[j] = 0;
  1083. j = decryptMail((unsigned char *)name+i,encrypted);
  1084. i += j;
  1085. ptr = strstr(ptr,"</script>");
  1086. ptr += 9;
  1087. }
  1088. else name[i++] = *ptr++;
  1089. }
  1090. else name[i++] = *ptr++;
  1091. }
  1092. resData->appendBytes(name, i);
  1093. resData->appendBytes("<>", 2);
  1094. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1095. resData->appendBytes("<>", 2);
  1096. ptr += 2;
  1097. i=0;
  1098. while(1) {
  1099. if(*ptr == '<') {
  1100. if(!strncasecmp(ptr,"<dd>",4)) {
  1101. ptr += 4;
  1102. break;
  1103. }
  1104. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  1105. memcpy(date+i,"BE:",3);
  1106. ptr += 23;
  1107. i += 3;
  1108. while(*ptr != ')') date[i++] = *ptr++;
  1109. date[i++] = '-';
  1110. ptr = strchr(ptr,'?');
  1111. ptr++;
  1112. char *tmp = strstr(ptr,"</a>");
  1113. memcpy(date+i,ptr,tmp-ptr);
  1114. i += tmp-ptr;
  1115. ptr = tmp + 4;
  1116. }
  1117. else date[i++] = *ptr++;
  1118. }
  1119. else date[i++] = *ptr++;
  1120. }
  1121. resData->appendBytes(date ,i);
  1122. resData->appendBytes("<>", 2);
  1123. i=0;
  1124. while(1) {
  1125. if(*ptr == '<') {
  1126. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  1127. ptr += 9;
  1128. break;
  1129. }
  1130. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  1131. while(i>0 &&body[i-1] == '\n') i--;
  1132. break;
  1133. }
  1134. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1135. int j=0;
  1136. ptr = strstr(ptr,"data-cfemail=\"");
  1137. ptr += 14;
  1138. while(*ptr != '"') encrypted[j++] = *ptr++;
  1139. encrypted[j] = 0;
  1140. j = decryptMail(body+i,encrypted);
  1141. i += j;
  1142. ptr = strstr(ptr,"</script>");
  1143. ptr += 9;
  1144. }
  1145. else if(!strncmp(ptr,"<a href=\"http",13)) {
  1146. ptr = strchr(ptr,'>');
  1147. ptr++;
  1148. char *link = ptr;
  1149. ptr = strstr(link,"</a>");
  1150. memcpy(body+i,link,ptr-link);
  1151. i += ptr-link;
  1152. ptr += 4;
  1153. }
  1154. else if(!strncmp(ptr,"<img src=\"",10)) {
  1155. ptr += 10;
  1156. char *img = ptr;
  1157. ptr = strstr(img,"\">");
  1158. memcpy(body+i,img,ptr-img);
  1159. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1160. int length = ptr-img;
  1161. while(*img != '/') {
  1162. img++;
  1163. length--;
  1164. }
  1165. memcpy(body+i,"sssp:",5);
  1166. memcpy(body+i+5,img,length);
  1167. i += length + 5;
  1168. }
  1169. else i += ptr-img;
  1170. ptr += 2;
  1171. }
  1172. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  1173. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1174. memcpy(body+i," <br>",5);
  1175. i += 5;
  1176. }
  1177. else {
  1178. memcpy(body+i,"<br>",4);
  1179. i += 4;
  1180. }
  1181. ptr += 4;
  1182. }
  1183. else body[i++] = *ptr++;
  1184. }
  1185. else if(!bbspink && *ptr == ' ') {
  1186. if(*(ptr+1) == ' ') ptr++;
  1187. else body[i++] = *ptr++;
  1188. }
  1189. else body[i++] = *ptr++;
  1190. }
  1191. resData->appendBytes(body ,i);
  1192. resData->appendBytes("<>", 2);
  1193. if(res == 1) resData->appendBytes(title ,strlen(title));
  1194. resData->appendBytes("\n" ,1);
  1195. if(useCache && res == startResNum) {
  1196. PBBS2chProxyThreadInfo info;
  1197. bool hit = false;
  1198. pthread_mutex_lock(mutex);
  1199. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1200. if(it != threadCache->end()) {
  1201. info = it->second;
  1202. threadCache->erase(it);
  1203. }
  1204. pthread_mutex_unlock(mutex);
  1205. if(info) {
  1206. log_printf(5,"cache hit");
  1207. if(info->cachedData->length == resData->length) {
  1208. log_printf(5,"... size match");
  1209. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1210. log_printf(5,"... content match");
  1211. hit = true;
  1212. cachedSize = info->cachedSize - resData->length;
  1213. }
  1214. }
  1215. log_printf(5,"\n");
  1216. }
  1217. if(!hit) {
  1218. delete resData;
  1219. free(buffer);
  1220. return NULL;
  1221. }
  1222. }
  1223. txt->appendBytes(resData->bytes, resData->length);
  1224. res++;
  1225. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1226. snprintf(signature,32,"<dt>%d ",res);
  1227. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  1228. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1229. info->lastResNum = res-1;
  1230. info->cachedSize = txt->length+cachedSize;
  1231. info->cachedData = resData;
  1232. pthread_mutex_lock(mutex);
  1233. threadCache->insert(std::make_pair(threadKey,info));
  1234. pthread_mutex_unlock(mutex);
  1235. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1236. if(lastModified) {
  1237. *lastModified = 0;
  1238. char formattedDate[256];
  1239. char *ptr;
  1240. ptr = date;
  1241. int year = strtol(ptr,&ptr,10);
  1242. if(*ptr != '/') break;
  1243. ptr++;
  1244. int month = strtol(ptr,&ptr,10);
  1245. if(*ptr != '/') break;
  1246. ptr++;
  1247. int day = strtol(ptr,&ptr,10);
  1248. if(!*ptr) break;
  1249. while(*ptr != ' ' && *ptr != 0) ptr++;
  1250. if(!*ptr) break;
  1251. ptr++;
  1252. int hour = strtol(ptr,&ptr,10);
  1253. if(*ptr != ':') break;
  1254. ptr++;
  1255. int minutes = strtol(ptr,&ptr,10);
  1256. if(*ptr != ':') break;
  1257. ptr++;
  1258. int seconds = strtol(ptr,&ptr,10);
  1259. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1260. if(year < 100) year += 2000;
  1261. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1262. //fprintf(stderr,"%s\n",formattedDate);
  1263. struct tm time = {};
  1264. strptime(formattedDate,threadTimestampFmt,&time);
  1265. *lastModified = mktime(&time);
  1266. //gmtime_r(lastModified,&time);
  1267. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1268. //fprintf(stderr,"%s\n",formattedDate);
  1269. }
  1270. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1271. break;
  1272. }
  1273. delete resData;
  1274. }
  1275. free(buffer);
  1276. return txt;
  1277. }
  1278. DataStorage *BBS2chProxyConnection::html2dat(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1279. {
  1280. char *ptr = html->bytes;
  1281. char *end = html->bytes + html->length - 1;
  1282. DataStorage *txt = new DataStorage();
  1283. int res = startResNum, i=0;
  1284. char signature[64];
  1285. char title[1024];
  1286. int cachedSize = 0;
  1287. char signatureTag[32];
  1288. char closeTag[32];
  1289. int closeTagLen;
  1290. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  1291. if(!ptr) {
  1292. delete txt;
  1293. return html2dat_old(html, startResNum, lastModified, useCache);
  1294. }
  1295. else {
  1296. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  1297. if(ptr2) {
  1298. char *tmp = ptr2;
  1299. *ptr2 = 0;
  1300. while(*ptr2 != '<') ptr2--;
  1301. strcpy(signatureTag, ptr2);
  1302. *tmp = ' ';
  1303. }
  1304. else {
  1305. delete txt;
  1306. return NULL;
  1307. }
  1308. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  1309. if(ptr2) {
  1310. delete txt;
  1311. return html2dat_pink(html, startResNum, lastModified, useCache);
  1312. }*/
  1313. }
  1314. ptr += 18;
  1315. while(1) {
  1316. if(*ptr == '<') {
  1317. if(!strncasecmp(ptr,"</h1>",5)) {
  1318. ptr += 5;
  1319. break;
  1320. }
  1321. else title[i++] = *ptr++;
  1322. }
  1323. else if(*ptr == '\n') break;
  1324. else title[i++] = *ptr++;
  1325. }
  1326. title[i] = 0;
  1327. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  1328. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1329. if(!ptr) {
  1330. delete txt;
  1331. return NULL;
  1332. }
  1333. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1334. if(!buffer) {
  1335. delete txt;
  1336. return NULL;
  1337. }
  1338. unsigned char *body = buffer;
  1339. char *mail = (char *)body + 65536;
  1340. char *name = mail + 1024;
  1341. char *date = name + 1024;
  1342. char *encrypted = date + 1024;
  1343. while(ptr < end) {
  1344. //fprintf(stderr,"%s\n",signature);
  1345. DataStorage *resData = new DataStorage();
  1346. i=0;
  1347. mail[0] = 0;
  1348. ptr = strstr(ptr," class=\"name\"><b>");
  1349. if(ptr) {
  1350. char *tmp = ptr;
  1351. *ptr = 0;
  1352. while(*ptr != '<') ptr--;
  1353. snprintf(closeTag,32,"</%s>",ptr+1);
  1354. closeTagLen = strlen(closeTag);
  1355. ptr = tmp + 17;
  1356. }
  1357. else {
  1358. delete resData;
  1359. break;
  1360. }
  1361. char endStr[64];
  1362. if(!strncmp(ptr,"<a href=\"mailto:",16)) {
  1363. replay:
  1364. // has mail
  1365. while(*ptr != '"') ptr++;
  1366. ptr++;
  1367. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1368. ptr += 28;
  1369. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1370. encrypted[i] = 0;
  1371. i = decryptMail((unsigned char *)mail,encrypted);
  1372. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1373. ptr -= reconstruct_len;
  1374. char *start = ptr;
  1375. memcpy(ptr, "<a href=\"mailto:", 16);
  1376. ptr += 16;
  1377. memcpy(ptr, mail, i);
  1378. ptr = start;
  1379. i=0;
  1380. goto replay;
  1381. }
  1382. else {
  1383. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1384. while(1) {
  1385. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  1386. ptr = strchr(ptr,'>');
  1387. ptr++;
  1388. char *link = ptr;
  1389. ptr = strstr(link,"</a>");
  1390. memcpy(mail+i,link,ptr-link);
  1391. i += ptr-link;
  1392. ptr += 4;
  1393. }
  1394. else if(*ptr == '"') break;
  1395. else mail[i++] = *ptr++;
  1396. }
  1397. //while(*ptr != '"') mail[i++] = *ptr++;
  1398. mail[i] = 0;
  1399. }
  1400. snprintf(endStr,64,"</a></b>%s",closeTag);
  1401. while(*ptr != '>') ptr++;
  1402. ptr++;
  1403. }
  1404. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  1405. emit font tags anymore and it conflicts with text decorations using "melon point" */
  1406. /*else if(!strncmp(ptr,"<font",5)) {
  1407. snprintf(endStr,64,"</font></b>%s",closeTag);
  1408. while(*ptr != '>') ptr++;
  1409. ptr++;
  1410. }*/
  1411. else {
  1412. snprintf(endStr,64,"</b>%s",closeTag);
  1413. }
  1414. i=0;
  1415. while(1) {
  1416. if(*ptr == '<') {
  1417. if(!strncmp(ptr,endStr,strlen(endStr))) {
  1418. ptr += strlen(endStr);
  1419. break;
  1420. }
  1421. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1422. int j=0;
  1423. ptr = strstr(ptr,"data-cfemail=\"");
  1424. ptr += 14;
  1425. while(*ptr != '"') encrypted[j++] = *ptr++;
  1426. encrypted[j] = 0;
  1427. j = decryptMail((unsigned char *)name+i,encrypted);
  1428. i += j;
  1429. ptr = strstr(ptr,"</script>");
  1430. ptr += 9;
  1431. }
  1432. else if(!strncmp(ptr,"<a href=\"",9)) {
  1433. ptr = strchr(ptr,'>');
  1434. ptr++;
  1435. char *link = ptr;
  1436. ptr = strstr(link,"</a>");
  1437. memcpy(name+i,link,ptr-link);
  1438. i += ptr-link;
  1439. ptr += 4;
  1440. }
  1441. else name[i++] = *ptr++;
  1442. }
  1443. else name[i++] = *ptr++;
  1444. }
  1445. resData->appendBytes(name, i);
  1446. resData->appendBytes("<>", 2);
  1447. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1448. resData->appendBytes("<>", 2);
  1449. ptr = strstr(ptr," class=\"date\">");
  1450. if(ptr) {
  1451. char *tmp = ptr;
  1452. *ptr = 0;
  1453. while(*ptr != '<') ptr--;
  1454. snprintf(closeTag,32,"</%s>",ptr+1);
  1455. closeTagLen = strlen(closeTag);
  1456. ptr = tmp + 14;
  1457. }
  1458. else {
  1459. delete resData;
  1460. break;
  1461. }
  1462. i=0;
  1463. while(1) {
  1464. if(*ptr == '<') {
  1465. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1466. ptr += closeTagLen;
  1467. break;
  1468. }
  1469. else date[i++] = *ptr++;
  1470. }
  1471. else date[i++] = *ptr++;
  1472. }
  1473. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  1474. char *tmp = ptr+1;
  1475. while(*ptr != ' ') ptr++;
  1476. *ptr = 0;
  1477. snprintf(closeTag,32,"</%s>",tmp);
  1478. closeTagLen = strlen(closeTag);
  1479. ptr += 11;
  1480. while(*ptr != '>') ptr++;
  1481. ptr++;
  1482. date[i++] = ' ';
  1483. while(1) {
  1484. if(*ptr == '<') {
  1485. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1486. ptr += closeTagLen;
  1487. break;
  1488. }
  1489. else date[i++] = *ptr++;
  1490. }
  1491. else date[i++] = *ptr++;
  1492. }
  1493. }
  1494. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  1495. ptr += 14;
  1496. while(*ptr != '>') ptr++;
  1497. ptr++;
  1498. if(!strncmp(ptr,"<a href=\"",9)) {
  1499. ptr += 9;
  1500. while(*ptr != '/' && *ptr != '"') ptr++;
  1501. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  1502. memcpy(date+i," BE:",4);
  1503. i += 4;
  1504. ptr += 18;
  1505. while(*ptr != '"') date[i++] = *ptr++;
  1506. date[i++] = '-';
  1507. ptr = strchr(ptr,'?');
  1508. ptr++;
  1509. char *tmp = strstr(ptr,"</a>");
  1510. memcpy(date+i,ptr,tmp-ptr);
  1511. i += tmp-ptr;
  1512. ptr = tmp + 4;
  1513. }
  1514. }
  1515. }
  1516. resData->appendBytes(date ,i);
  1517. resData->appendBytes("<>", 2);
  1518. if(!strcmp(signatureTag,"<div")) {
  1519. ptr = strstr(ptr,"<div class=\"message\">");
  1520. if(!ptr) {
  1521. delete resData;
  1522. break;
  1523. }
  1524. else {
  1525. ptr += 21;
  1526. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  1527. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  1528. strcpy(closeTag,"</span></span></div>");
  1529. closeTagLen = 20;
  1530. ptr += 22+17;
  1531. }
  1532. else {
  1533. strcpy(closeTag,"</span></div>");
  1534. closeTagLen = 13;
  1535. ptr += 22;
  1536. }
  1537. }
  1538. else {
  1539. strcpy(closeTag,"</div>");
  1540. closeTagLen = 6;
  1541. }
  1542. }
  1543. }
  1544. else {
  1545. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  1546. if(!ptr) {
  1547. delete resData;
  1548. break;
  1549. }
  1550. strcpy(closeTag,"</dd>");
  1551. closeTagLen = 5;
  1552. ptr += 22;
  1553. }
  1554. i=0;
  1555. while(1) {
  1556. if(*ptr == '<') {
  1557. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1558. ptr += closeTagLen;
  1559. break;
  1560. }
  1561. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1562. int j=0;
  1563. ptr = strstr(ptr,"data-cfemail=\"");
  1564. ptr += 14;
  1565. while(*ptr != '"') encrypted[j++] = *ptr++;
  1566. encrypted[j] = 0;
  1567. j = decryptMail(body+i,encrypted);
  1568. i += j;
  1569. ptr = strstr(ptr,"</script>");
  1570. ptr += 9;
  1571. }
  1572. else if(!strncmp(ptr,"<a ",3)) {
  1573. char *tmp = strchr(ptr,'>');
  1574. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  1575. char *link = tmp+1;
  1576. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  1577. while(ptr < link) {
  1578. if(!strncmp(ptr," class=\"",8)) {
  1579. ptr += 8;
  1580. while(*ptr != '"' && *ptr != '>') ptr++;
  1581. if(*ptr == '"') ptr++;
  1582. }
  1583. else body[i++] = *ptr++;
  1584. }
  1585. }
  1586. else {
  1587. ptr = strstr(link,"</a>");
  1588. memcpy(body+i,link,ptr-link);
  1589. i += ptr-link;
  1590. ptr += 4;
  1591. }
  1592. }
  1593. else if(!strncmp(ptr,"<img src=\"",10)) {
  1594. ptr += 10;
  1595. char *img = ptr;
  1596. ptr = strstr(img,"\">");
  1597. memcpy(body+i,img,ptr-img);
  1598. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1599. int length = ptr-img;
  1600. while(*img != '/') {
  1601. img++;
  1602. length--;
  1603. }
  1604. memcpy(body+i,"sssp:",5);
  1605. memcpy(body+i+5,img,length);
  1606. i += length + 5;
  1607. }
  1608. else i += ptr-img;
  1609. ptr += 2;
  1610. }
  1611. else if(!strncmp(ptr,"<br>",4)) {
  1612. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1613. memcpy(body+i," <br>",5);
  1614. i += 5;
  1615. }
  1616. else {
  1617. memcpy(body+i,"<br>",4);
  1618. i += 4;
  1619. }
  1620. ptr += 4;
  1621. }
  1622. else body[i++] = *ptr++;
  1623. }
  1624. else body[i++] = *ptr++;
  1625. }
  1626. resData->appendBytes(body ,i);
  1627. resData->appendBytes("<>", 2);
  1628. if(res == 1) resData->appendBytes(title ,strlen(title));
  1629. resData->appendBytes("\n" ,1);
  1630. if(useCache && res == startResNum) {
  1631. PBBS2chProxyThreadInfo info;
  1632. bool hit = false;
  1633. pthread_mutex_lock(mutex);
  1634. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1635. if(it != threadCache->end()) {
  1636. info = it->second;
  1637. threadCache->erase(it);
  1638. }
  1639. pthread_mutex_unlock(mutex);
  1640. if(info) {
  1641. log_printf(5,"cache hit");
  1642. if(info->cachedData->length == resData->length) {
  1643. log_printf(5,"... size match");
  1644. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1645. log_printf(5,"... content match");
  1646. hit = true;
  1647. cachedSize = info->cachedSize - resData->length;
  1648. }
  1649. }
  1650. log_printf(5,"\n");
  1651. }
  1652. if(!hit) {
  1653. delete resData;
  1654. free(buffer);
  1655. return NULL;
  1656. }
  1657. }
  1658. txt->appendBytes(resData->bytes, resData->length);
  1659. res++;
  1660. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1661. snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  1662. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1663. if(ptr) {
  1664. int next = atoi(ptr+strlen(signature));
  1665. if(next >= res) {
  1666. while(next > res) {
  1667. txt->appendBytes("broken<><>broken<> broken <>\n", 29);
  1668. res++;
  1669. }
  1670. }
  1671. else ptr = NULL;
  1672. }
  1673. if(!ptr) {
  1674. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1675. info->lastResNum = res-1;
  1676. info->cachedSize = txt->length+cachedSize;
  1677. info->cachedData = resData;
  1678. pthread_mutex_lock(mutex);
  1679. threadCache->insert(std::make_pair(threadKey,info));
  1680. pthread_mutex_unlock(mutex);
  1681. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1682. if(lastModified) {
  1683. *lastModified = 0;
  1684. char formattedDate[256];
  1685. char *ptr;
  1686. ptr = date;
  1687. int year = strtol(ptr,&ptr,10);
  1688. if(*ptr != '/') break;
  1689. ptr++;
  1690. int month = strtol(ptr,&ptr,10);
  1691. if(*ptr != '/') break;
  1692. ptr++;
  1693. int day = strtol(ptr,&ptr,10);
  1694. if(!*ptr) break;
  1695. while(*ptr != ' ' && *ptr != 0) ptr++;
  1696. if(!*ptr) break;
  1697. ptr++;
  1698. int hour = strtol(ptr,&ptr,10);
  1699. if(*ptr != ':') break;
  1700. ptr++;
  1701. int minutes = strtol(ptr,&ptr,10);
  1702. if(*ptr != ':') break;
  1703. ptr++;
  1704. int seconds = strtol(ptr,&ptr,10);
  1705. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1706. if(year < 100) year += 2000;
  1707. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1708. //fprintf(stderr,"%s\n",formattedDate);
  1709. struct tm time = {};
  1710. strptime(formattedDate,threadTimestampFmt,&time);
  1711. *lastModified = mktime(&time);
  1712. //gmtime_r(lastModified,&time);
  1713. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1714. //fprintf(stderr,"%s\n",formattedDate);
  1715. }
  1716. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1717. break;
  1718. }
  1719. delete resData;
  1720. }
  1721. free(buffer);
  1722. return txt;
  1723. }
  1724. int BBS2chProxyConnection::datProxyAPI(const char *url, const char *method)
  1725. {
  1726. long statusCode = 0;
  1727. std::string postBody = auth->requestBodyForURL(url);
  1728. if(postBody.empty()) {
  1729. sendResponse(401, "Unauthorized", fpw);
  1730. return 401;
  1731. }
  1732. CURL *curl = curl_easy_init();
  1733. char *buf = (char *)malloc(16384);
  1734. if(curl) {
  1735. CURLcode res;
  1736. struct curl_slist *headers = NULL;
  1737. DataStorage *receivedHeader = new DataStorage();
  1738. DataStorage *receivedBody = new DataStorage();
  1739. while(fgets(buf,16384,fpr)) {
  1740. //fprintf(stderr,"%s",buf);
  1741. if(!strcmp("\r\n",buf)) break;
  1742. else if(!strncasecmp("Range:",buf,6)
  1743. || !strncasecmp("If-Modified-Since:",buf,18)
  1744. || !strncasecmp("Accept-Encoding:",buf,16)) {
  1745. char *ptr;
  1746. ptr = strchr(buf, '\r');
  1747. if(!ptr) ptr = strchr(buf, '\n');
  1748. if(!ptr) continue;
  1749. *ptr = 0;
  1750. headers = curl_slist_append(headers, buf);
  1751. }
  1752. }
  1753. if(x_2ch_ua_dat) headers = curl_slist_append(headers, x_2ch_ua_dat);
  1754. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1755. curl_easy_setopt(curl, CURLOPT_URL, url);
  1756. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
  1757. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1758. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1759. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1760. curl_easy_setopt(curl, CURLOPT_WRITEDATA, receivedBody);
  1761. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_download);
  1762. curl_easy_setopt(curl, CURLOPT_HEADERDATA, receivedHeader);
  1763. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1764. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1765. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1766. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1767. if(proxy_server) {
  1768. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1769. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1770. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1771. }
  1772. curl_easy_setopt(curl, CURLOPT_USERAGENT, api_ua_dat?api_ua_dat:"");
  1773. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  1774. #if LIBCURL_VERSION_NUM >= 0x071101
  1775. curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, postBody.c_str());
  1776. #else
  1777. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postBody.c_str());
  1778. #endif
  1779. //return;
  1780. res = curl_easy_perform(curl);
  1781. if(res == CURLE_OK) {
  1782. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1783. receivedHeader->appendBytes("",1);
  1784. char *ptr = strstr(receivedHeader->bytes,"\r\n\r\n");
  1785. *(ptr+4) = 0;
  1786. int threadStatus = 0;
  1787. for(char *ptr2 = receivedHeader->bytes; ptr2<ptr;) {
  1788. if(!strncasecmp(ptr2, "Thread-Status:", strlen("Thread-Status:"))) {
  1789. ptr2 += strlen("Thread-Status:");
  1790. while(*ptr2 == ' ') ptr2++;
  1791. threadStatus = atoi(ptr2);
  1792. break;
  1793. }
  1794. ptr2 = strstr(ptr2, "\r\n") + 2;
  1795. }
  1796. if(threadStatus == 1) {
  1797. if(ptr+4-receivedHeader->bytes > fwrite(receivedHeader->bytes,1,ptr+4-receivedHeader->bytes,fpw)) goto last;
  1798. fflush(fpw);
  1799. if(receivedBody->length > fwrite(receivedBody->bytes,1,receivedBody->length,fpw)) goto last;
  1800. fflush(fpw);
  1801. goto last;
  1802. }
  1803. else if(threadStatus == 8) {
  1804. sendBasicHeaders(302, "Found", fpw);
  1805. if(0 > fprintf(fpw, "Location: http://www2.2ch.net/live.html\r\n")) goto last;
  1806. if(0 > fprintf(fpw, "\r\n")) goto last;
  1807. fflush(fpw);
  1808. statusCode = 302;
  1809. goto last;
  1810. }
  1811. else {
  1812. if (statusCode < 400) {
  1813. sendResponse(401, "Unauthorized", fpw);
  1814. statusCode = 401;
  1815. }
  1816. else {
  1817. sendResponse(503, "Service Unavailable", fpw);
  1818. statusCode = 503;
  1819. }
  1820. receivedBody->appendBytes("",1);
  1821. if(!strncasecmp(receivedBody->bytes,"ng (",4)) {
  1822. log_printf(0,"API gateway returned error: %s\n",receivedBody->bytes);
  1823. }
  1824. }
  1825. //fprintf(stderr,"%ld\n",statusCode);
  1826. }
  1827. else {
  1828. log_printf(0,"curl error: %s\n",curl_easy_strerror(res));
  1829. sendResponse(503, "Service Unavailable", fpw);
  1830. statusCode = 503;
  1831. }
  1832. last:
  1833. curl_easy_cleanup(curl);
  1834. curl_slist_free_all(headers);
  1835. delete receivedBody;
  1836. delete receivedHeader;
  1837. }
  1838. free(buf);
  1839. return statusCode;
  1840. }
  1841. int BBS2chProxyConnection::bbsmenuProxy(const char *url, const char *method)
  1842. {
  1843. long statusCode = 0;
  1844. DataStorage *dat = new DataStorage();
  1845. DataStorage *outHTML = new DataStorage();
  1846. char *buf = (char *)malloc(16384);
  1847. char userAgentFromHeader[1024] = "";
  1848. CURL *curl = NULL;
  1849. if(!buf) goto last;
  1850. while(fgets(buf,16384,fpr)) {
  1851. //fprintf(stderr,"%s",buf);
  1852. if(!strcmp("\r\n",buf)) break;
  1853. else if(!strncasecmp("User-Agent:",buf,11)) {
  1854. int i=0;
  1855. char *ptr = buf+12;
  1856. while(*ptr != '\r' && *ptr != '\n' && i < 1023) userAgentFromHeader[i++] = *ptr++;
  1857. userAgentFromHeader[i] = 0;
  1858. }
  1859. }
  1860. curl = curl_easy_init();
  1861. if(curl) {
  1862. CURLcode res;
  1863. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1864. curl_easy_setopt(curl, CURLOPT_URL, url);
  1865. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1866. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1867. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  1868. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1869. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  1870. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  1871. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1872. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1873. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1874. if(proxy_server) {
  1875. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1876. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1877. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1878. }
  1879. if(user_agent) {
  1880. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1881. }
  1882. else if(userAgentFromHeader[0]) {
  1883. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  1884. }
  1885. res = curl_easy_perform(curl);
  1886. if(res == CURLE_OK) {
  1887. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1888. if(statusCode == 200 && dat->length) {
  1889. dat->appendBytes("",1);
  1890. dat->length--;
  1891. char *ptr = dat->bytes;
  1892. while(*ptr) {
  1893. if(!strncasecmp(ptr,"<a href=",8)) {
  1894. char *start = ptr;
  1895. char *end = strchr(ptr+8,'>');
  1896. ptr = strstr(ptr+8,"://");
  1897. if(ptr && ptr < end) {
  1898. char *protocol = ptr;
  1899. char *end2 = strchr(ptr+3,'/');
  1900. ptr = strstr(protocol+3,"5ch.net");
  1901. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1902. memcpy(ptr,"2ch",3);
  1903. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1904. else outHTML->appendBytes(start, protocol-start);
  1905. outHTML->appendBytes(protocol, end-protocol);
  1906. ptr = end;
  1907. continue;
  1908. }
  1909. ptr = strstr(protocol+3,"bbspink.com");
  1910. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1911. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1912. else outHTML->appendBytes(start, protocol-start);
  1913. outHTML->appendBytes(protocol, end-protocol);
  1914. ptr = end;
  1915. continue;
  1916. }
  1917. }
  1918. ptr = start;
  1919. }
  1920. outHTML->appendBytes(ptr++, 1);
  1921. }
  1922. }
  1923. }
  1924. else {
  1925. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  1926. statusCode = 503;
  1927. }
  1928. }
  1929. if(statusCode == 200) {
  1930. sendBasicHeaders(statusCode,"OK",fpw);
  1931. if(0 > fprintf(fpw,"Content-Type: text/html\r\n")) goto last;
  1932. if(0 > fprintf(fpw,"Content-Length: %ld\r\n",(long)outHTML->length)) goto last;
  1933. if(0 > fprintf(fpw,"\r\n")) goto last;
  1934. if(strcasecmp(method, "HEAD")) {
  1935. if(outHTML->length > fwrite(outHTML->bytes,1,outHTML->length,fpw)) goto last;
  1936. }
  1937. fflush(fpw);
  1938. }
  1939. else {
  1940. sendResponse(503, "Service Unavailable", fpw);
  1941. statusCode = 503;
  1942. }
  1943. last:
  1944. if(curl) curl_easy_cleanup(curl);
  1945. if(buf) free(buf);
  1946. if(dat) delete dat;
  1947. if(outHTML) delete outHTML;
  1948. return statusCode;
  1949. }