BBS2chProxyConnection.cpp 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085
  1. #include <pthread.h>
  2. #include <time.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <unistd.h>
  6. #include <curl/curl.h>
  7. #ifdef USE_LUA
  8. #include <lua.hpp>
  9. #endif
  10. #ifdef _WIN32
  11. #include <fcntl.h>
  12. #include <winsock2.h>
  13. #include <ws2tcpip.h>
  14. #include <mswsock.h>
  15. #define CLOSESOCKET(x) closesocket(x)
  16. #else
  17. #include <sys/socket.h>
  18. #include <netinet/in.h>
  19. #include <netdb.h>
  20. #include <arpa/inet.h>
  21. #define CLOSESOCKET(x) close(x)
  22. #endif
  23. #include "BBS2chProxyConnection.h"
  24. #include "DataStorage.h"
  25. #include "hmac.h"
  26. #include "stringEncodingConverter.h"
  27. //#define DEBUG 1
  28. extern char *proxy_server;
  29. extern long proxy_port;
  30. extern long proxy_type;
  31. extern long timeout;
  32. extern char *user_agent;
  33. extern char *api_ua_dat;
  34. extern char *x_2ch_ua_dat;
  35. extern char *appKey;
  36. extern int allow_chunked;
  37. extern int curl_features;
  38. extern unsigned int curl_version_number;
  39. extern bool accept_https;
  40. extern int force_5chnet;
  41. extern int force_5chnet_https;
  42. extern int force_ipv4;
  43. extern char *bbsmenu_url;
  44. extern char *api_server;
  45. extern std::map<std::string, std::string> bbscgi_headers;
  46. extern int gikofix;
  47. extern CURLSH *curl_share;
  48. extern char *lua_script;
  49. extern void log_printf(int level, const char *format ...);
  50. #include "utils.h"
  51. #ifdef USE_LUA
  52. extern "C" {
  53. static int lua_hmacSHA256(lua_State *l)
  54. {
  55. static const char *table = "0123456789abcdef";
  56. size_t keyLength, dataLength;
  57. const char *key = luaL_checklstring(l, 1, &keyLength);
  58. const char *data = luaL_checklstring(l, 2, &dataLength);
  59. if (!key || !data) return 0;
  60. unsigned char digest[32];
  61. char digestStr[65];
  62. proxy2ch_HMAC_SHA256(key, keyLength, data, dataLength, digest);
  63. for (int i=0; i<32; i++) {
  64. unsigned char c = digest[i];
  65. unsigned char upper = (c >> 4) & 0xf;
  66. unsigned char lower = c & 0xf;
  67. digestStr[i*2] = table[upper];
  68. digestStr[i*2+1] = table[lower];
  69. }
  70. digestStr[64] = 0;
  71. lua_pushstring(l, digestStr);
  72. return 1;
  73. }
  74. static int lua_decodeURIComponent(lua_State *l)
  75. {
  76. size_t length;
  77. const char *input = luaL_checklstring(l, 1, &length);
  78. if (!input) return 0;
  79. bool decodePlus = true;
  80. if (!lua_isnoneornil(l, 2)) {
  81. decodePlus = (lua_toboolean(l, 2));
  82. }
  83. std::string output;
  84. for (int i=0;i<length;i++) {
  85. if (input[i] == '%') {
  86. if (i < length - 2) {
  87. char from[3];
  88. char *end;
  89. from[0] = input[i+1];
  90. from[1] = input[i+2];
  91. from[2] = 0;
  92. unsigned long n = strtoul(from, &end, 16);
  93. if (n < 256 && end == from+2) {
  94. output.append(1, n);
  95. i += 2;
  96. continue;
  97. }
  98. }
  99. }
  100. else if (decodePlus && input[i] == '+') {
  101. output.append(" ");
  102. continue;
  103. }
  104. output.append(1, input[i]);
  105. }
  106. lua_pushstring(l, output.c_str());
  107. return 1;
  108. }
  109. static int lua_encodeURIComponent(lua_State *l)
  110. {
  111. size_t length;
  112. const char *input = luaL_checklstring(l, 1, &length);
  113. if (!input) return 0;
  114. bool spaceAsPlus = true;
  115. if (!lua_isnoneornil(l, 2)) {
  116. spaceAsPlus = (lua_toboolean(l, 2));
  117. }
  118. std::string output;
  119. for (int i=0;i<length;i++) {
  120. unsigned char c = (unsigned char)input[i];
  121. if ((c >= '0' && c <= '9') ||
  122. (c >= 'A' && c <= 'Z') ||
  123. (c >= 'a' && c <= 'z') ||
  124. (c == '*') || (c == '-') || (c == '.') || (c == '_')) {
  125. output.append(1, c);
  126. }
  127. else if (c == ' ' && spaceAsPlus) {
  128. output.append("+");
  129. }
  130. else {
  131. char percentEncoded[4];
  132. snprintf(percentEncoded, 4, "%%%02X", c);
  133. output.append(percentEncoded);
  134. }
  135. }
  136. lua_pushstring(l, output.c_str());
  137. return 1;
  138. }
  139. static int lua_convertShiftJISToUTF8(lua_State *l)
  140. {
  141. size_t length;
  142. const char *input = luaL_checklstring(l, 1, &length);
  143. if (!input) return 0;
  144. if (length > 0) {
  145. char *output = convertShiftJISToUTF8(input, length);
  146. if (!output) lua_pushnil(l);
  147. else {
  148. lua_pushstring(l, output);
  149. free(output);
  150. }
  151. }
  152. else lua_pushstring(l, "");
  153. return 1;
  154. }
  155. }
  156. static std::string monaKey;
  157. void BBS2chProxyConnection::setMonaKey(const std::string &key) {
  158. pthread_mutex_lock(mutex);
  159. monaKey = key;
  160. pthread_mutex_unlock(mutex);
  161. }
  162. std::string BBS2chProxyConnection::getMonaKey() {
  163. if (monaKey.empty())
  164. return "00000000-0000-0000-0000-000000000000";
  165. return monaKey;
  166. }
  167. #endif
  168. void *BBS2chProxyConnection::launch(void *param)
  169. {
  170. reinterpret_cast<BBS2chProxyConnection *>(param)->connect();
  171. return (void *)NULL;
  172. }
  173. void BBS2chProxyConnection::run(void)
  174. {
  175. pthread_t thread;
  176. pthread_attr_t thread_attr;
  177. pthread_attr_init(&thread_attr);
  178. pthread_attr_setdetachstate(&thread_attr , PTHREAD_CREATE_DETACHED);
  179. if(0 != pthread_create(&thread , &thread_attr , &BBS2chProxyConnection::launch , this))
  180. perror("pthread_create");
  181. pthread_attr_destroy(&thread_attr);
  182. }
  183. void *BBS2chProxyConnection::tunnel_c2s(void *param)
  184. {
  185. char buf[2048];
  186. fd_set fds;
  187. int sock_c = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_c;
  188. int sock_s = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_s;
  189. #ifdef _WIN32
  190. struct timeval timeout;
  191. timeout.tv_sec = 0;
  192. timeout.tv_usec = 10000;
  193. #endif
  194. while(1) {
  195. #ifdef _WIN32
  196. FD_ZERO(&fds);
  197. FD_SET(sock_c,&fds);
  198. if(select(sock_c + 1,&fds,NULL,NULL,&timeout) < 0) break;
  199. if(FD_ISSET(sock_c,&fds)) {
  200. #endif
  201. int ret = recv(sock_c, buf, 2048, 0);
  202. if(ret > 0) send(sock_s, buf, ret, 0);
  203. else if(ret <= 0) break;
  204. #ifdef _WIN32
  205. }
  206. #endif
  207. }
  208. //fprintf(stderr,"tunnel_c2s end\n");
  209. CLOSESOCKET(sock_s);
  210. return NULL;
  211. }
  212. void *BBS2chProxyConnection::tunnel_s2c(void *param)
  213. {
  214. char buf[2048];
  215. fd_set fds;
  216. int sock_c = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_c;
  217. int sock_s = reinterpret_cast<BBS2chProxyConnection *>(param)->sock_s;
  218. while(1) {
  219. int ret = recv(sock_s, buf, 2048, 0);
  220. if(ret > 0) send(sock_c, buf, ret, 0);
  221. else if(ret <= 0) break;
  222. }
  223. //fprintf(stderr,"tunnel_s2c end\n");
  224. CLOSESOCKET(sock_c);
  225. return NULL;
  226. }
  227. int BBS2chProxyConnection::tunnel(const char *addr, int port)
  228. {
  229. struct sockaddr_in server;
  230. memset(&server, 0, sizeof(server));
  231. server.sin_family = AF_INET;
  232. server.sin_addr.s_addr = inet_addr(addr);
  233. server.sin_port = htons(port);
  234. if(server.sin_addr.s_addr == 0xffffffff) {
  235. struct hostent *host;
  236. host = gethostbyname(addr);
  237. if (host == NULL) {
  238. sendResponse(400, "Bad Request", fpw);
  239. return -1;
  240. }
  241. server.sin_addr.s_addr = *(unsigned int *)host->h_addr_list[0];
  242. }
  243. log_printf(1,"Tunneling connection to %s:%d\n",addr,port);
  244. sock_s = socket(AF_INET, SOCK_STREAM, 0);
  245. if(-1 == ::connect(sock_s, (struct sockaddr *)&server, sizeof(server))) {
  246. perror("connect");
  247. sendResponse(400, "Bad Request", fpw);
  248. return -1;
  249. }
  250. send(sock_c, "HTTP/1.1 200 Connection established\r\n\r\n", 39, 0);
  251. pthread_t thread_c2s, thread_s2c;
  252. if(0 != pthread_create(&thread_c2s, NULL, &BBS2chProxyConnection::tunnel_c2s, this))
  253. perror("pthread_create");
  254. if(0 != pthread_create(&thread_s2c, NULL, &BBS2chProxyConnection::tunnel_s2c, this))
  255. perror("pthread_create");
  256. pthread_join(thread_c2s, NULL);
  257. pthread_join(thread_s2c, NULL);
  258. log_printf(1,"Tunneling connection to %s:%d finished\n",addr,port);
  259. return 0;
  260. }
  261. void BBS2chProxyConnection::connect(void)
  262. {
  263. char method[32], url[1024], protocol[32];
  264. int i;
  265. char *buf, *ptr;
  266. bool html2dat = false;
  267. bool html2dat_kako = false;
  268. regmatch_t match[7];
  269. long statusCode = 0;
  270. #ifdef _WIN32
  271. int sock_osfhandle = _open_osfhandle(sock_c, O_RDONLY);
  272. fpr = fdopen(sock_osfhandle, "rb");
  273. fpw = fdopen(sock_osfhandle, "wb");
  274. #else
  275. fpr = fdopen(sock_c, "rb");
  276. fpw = fdopen(sock_c, "wb");
  277. #endif
  278. if(!fpr || !fpw) {
  279. log_printf(0, "Error: cannot open file descripter for client\n");
  280. goto end;
  281. }
  282. buf = (char *)malloc(16384);
  283. if(!buf) goto end;
  284. ptr = buf;
  285. if(!fgets(buf,1024,fpr)) {
  286. sendResponse(400, "Bad Request", fpw);
  287. statusCode = 400;
  288. goto end;
  289. }
  290. i=0;
  291. while(*ptr != ' ' && *ptr != 0 && i < 32) method[i++] = *ptr++;
  292. if(*ptr == 0 || i == 32) {
  293. sendResponse(400, "Bad Request", fpw);
  294. statusCode = 400;
  295. goto end;
  296. }
  297. method[i] = 0;
  298. ptr++;
  299. i=0;
  300. while(*ptr != ' ' && *ptr != 0 && i < 1024) url[i++] = *ptr++;
  301. if(*ptr == 0 || i == 1024) {
  302. sendResponse(400, "Bad Request", fpw);
  303. statusCode = 400;
  304. goto end;
  305. }
  306. url[i] = 0;
  307. ptr++;
  308. i=0;
  309. while(*ptr != '\r' && *ptr != '\n' && *ptr != 0 && i < 32) protocol[i++] = *ptr++;
  310. if(*ptr == 0 || i == 32) {
  311. sendResponse(400, "Bad Request", fpw);
  312. statusCode = 400;
  313. goto end;
  314. }
  315. protocol[i] = 0;
  316. log_printf(1, "Received %s %s %s\n",method,url,protocol);
  317. if(strcasecmp(method,"GET") && strcasecmp(method,"POST") && strcasecmp(method,"HEAD") && strcasecmp(method,"CONNECT")) {
  318. sendResponse(400, "Bad Request", fpw);
  319. statusCode = 400;
  320. goto end;
  321. }
  322. if(!url[0]) {
  323. sendResponse(400, "Bad Request", fpw);
  324. statusCode = 400;
  325. goto end;
  326. }
  327. if(strncasecmp(protocol,"HTTP",4)) {
  328. sendResponse(400, "Bad Request", fpw);
  329. statusCode = 400;
  330. goto end;
  331. }
  332. if(!strcasecmp(method,"CONNECT")) {
  333. if(!accept_https) {
  334. sendResponse(400, "Bad Request", fpw);
  335. statusCode = 400;
  336. goto end;
  337. }
  338. while(fgets(buf,16384,fpr)) {
  339. if(!strcmp("\r\n",buf)) break;
  340. }
  341. int port = 443;
  342. char *ptr = strchr(url, ':');
  343. if(ptr) {
  344. *ptr = 0;
  345. port = atoi(ptr+1);
  346. }
  347. tunnel(url, port);
  348. goto end;
  349. }
  350. if(force_5chnet && !strncasecmp(url,"http://",7)) {
  351. char *ptr = url+8;
  352. char *end = ptr;
  353. while(*end != '/' && *end != 0) end++;
  354. ptr = strstr(ptr,".2ch.net");
  355. if(ptr && ptr < end && memcmp(ptr-4,"menu.",5)) {
  356. memcpy(ptr+1,"5ch",3);
  357. force5ch = true;
  358. log_printf(1, "Detected *.2ch.net URL, changed target URL to %s\n",url);
  359. }
  360. }
  361. if(regexec(&regex, url, 6, match, 0) != REG_NOMATCH) {
  362. if(appKey || strncasecmp(url+match[1].rm_so,"headline.",9)) html2dat = true;
  363. }
  364. else if(regexec(&regex_kako, url, 7, match, 0) != REG_NOMATCH) {
  365. html2dat_kako = true;
  366. }
  367. else if(regexec(&regex_offlaw, url, 5, match, 0) != REG_NOMATCH) {
  368. char *thread = strstr(url,"key=");
  369. if(thread) {
  370. match[6].rm_so = thread+4-url;
  371. match[6].rm_eo = thread+4-url;
  372. char *ptr = thread+4;
  373. while(*ptr != '&' && *ptr != 0) {
  374. ptr++;
  375. match[6].rm_eo++;
  376. }
  377. if(match[6].rm_so != match[6].rm_eo) html2dat_kako = true;
  378. }
  379. }
  380. if(html2dat || html2dat_kako) {
  381. char tmp[1024];
  382. regmatch_t *tid = html2dat ? match+5 : match+6;
  383. strcpy(tmp,url);
  384. tmp[match[2].rm_eo] = 0;
  385. tmp[match[4].rm_eo] = 0;
  386. tmp[tid->rm_eo] = 0;
  387. if(!appKey || html2dat_kako) {
  388. log_printf(1, "Retrieving thread via read.cgi...\n");
  389. snprintf(url,1024,"%s/%s/%s",tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  390. threadKey = std::string(url);
  391. snprintf(url,1024,"%s://%s/test/read.cgi/%s/%s/",force_5chnet_https?"https":"http",tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  392. statusCode = datProxy(url, method);
  393. }
  394. else {
  395. log_printf(1, "Retrieving thread via API...\n");
  396. tmp[match[1].rm_eo] = 0;
  397. snprintf(url,1024,"https://%s/v1/%s/%s/%s",api_server,tmp+match[1].rm_so,tmp+match[4].rm_so,tmp+tid->rm_so);
  398. statusCode = datProxyAPI(url, method);
  399. }
  400. }
  401. else {
  402. char urlMod[1024];
  403. strcpy(urlMod,url);
  404. if(force_5chnet_https) {
  405. const char *host = "5ch.net";
  406. char *ptr = strstr(url,host);
  407. if(!ptr) {
  408. host = "2ch.net";
  409. ptr = strstr(url,host);
  410. }
  411. if(!ptr) {
  412. host = "bbspink.com";
  413. ptr = strstr(url,host);
  414. }
  415. if(ptr) {
  416. char *start = url+7;
  417. char *end = strchr(start,'/');
  418. int hostLength = strlen(host);
  419. if(!end) end = url+strlen(url);
  420. if(ptr >= start && ptr < end && (*(ptr-1) == '.' || *(ptr-1) == '/') && (*(ptr+hostLength) == '/' || *(ptr+hostLength) == ':' || *(ptr+hostLength) == 0)) {
  421. log_printf(1, "This is %s URL, connecting with HTTPS\n",host);
  422. if(*(ptr+hostLength) == ':') {
  423. *(ptr+hostLength) = 0;
  424. snprintf(urlMod,1024,"https://%s%s",start,end);
  425. *(ptr+hostLength) = ':';
  426. }
  427. else snprintf(urlMod,1024,"https://%s",start);
  428. log_printf(2, "URL is: %s\n",urlMod);
  429. }
  430. }
  431. }
  432. if(bbsmenu_url && !strcmp(url, bbsmenu_url)) {
  433. log_printf(1, "Running as a BBS menu proxy...\n");
  434. statusCode = bbsmenuProxy(urlMod, method);
  435. }
  436. else {
  437. bool isPostRequest = !strcasecmp(method, "POST");
  438. if(isPostRequest && (strstr(urlMod,".5ch.net") || strstr(urlMod,".bbspink.com")) && strstr(urlMod,"/test/bbs.cgi")) bbscgi = true;
  439. log_printf(1, "Not a thread request, passthrough...\n");
  440. struct curl_slist *headers = NULL;
  441. bool hasExpect = false;
  442. char *postdata = NULL;
  443. std::string hostStr;
  444. std::string boardStr;
  445. std::string threadStr;
  446. bool isNotFormURLEncoded = false;
  447. #ifdef USE_LUA
  448. std::map<std::string, std::string> headersForLua;
  449. #endif
  450. while(fgets(buf,16384,fpr)) {
  451. if(bbscgi) {
  452. char *ptr = strchr(buf, ':');
  453. if(ptr) {
  454. std::string header(buf, ptr-buf);
  455. if(bbscgi_headers.find(header) != bbscgi_headers.end()) {
  456. log_printf(1, "Ignoring header \"%s\" because alternative value exists\n", header.c_str());
  457. continue;
  458. }
  459. }
  460. }
  461. //fprintf(fpw,"%s",buf);
  462. //fprintf(stderr,"%s",buf);
  463. if(!strcmp("\r\n",buf)) break;
  464. if(!strncasecmp("Connection:",buf,11)) continue;
  465. else if(!strncasecmp("Host:",buf,5)) {
  466. char *ptr;
  467. ptr = strchr(buf, '\r');
  468. if(!ptr) ptr = strchr(buf, '\n');
  469. if(!ptr) continue;
  470. *ptr = 0;
  471. ptr = strchr(buf+5, ':');
  472. if(ptr) *ptr = 0;
  473. ptr = buf+5;
  474. while(*ptr == ' ') ptr++;
  475. if(force_5chnet) {
  476. char *ptr2 = strstr(ptr, ".2ch.net");
  477. if(ptr2) {
  478. *(ptr2+1) = '5';
  479. }
  480. }
  481. hostStr = std::string(ptr);
  482. continue;
  483. }
  484. //else if(!strncasecmp("Authorization:",buf,14)) continue;
  485. //else if(!strncasecmp("WWW-Authenticate:",buf,17)) continue;
  486. else if(user_agent && !strncasecmp("User-Agent:",buf,11)) continue;
  487. else if(bbscgi && !strncasecmp("Content-Length:",buf,15)) {
  488. char *ptr = buf + 15;
  489. while(*ptr == ' ') ptr++;
  490. content_length = atoi(ptr);
  491. continue;
  492. }
  493. else {
  494. char *ptr;
  495. ptr = strchr(buf, '\r');
  496. if(!ptr) ptr = strchr(buf, '\n');
  497. if(!ptr) continue;
  498. *ptr = 0;
  499. if(bbscgi && force_5chnet && !strncasecmp("Referer:",buf,8)) {
  500. char *ptr2 = strstr(buf+8, ".2ch.net");
  501. if(ptr2) {
  502. *(ptr2+1) = '5';
  503. }
  504. }
  505. headers = curl_slist_append(headers, buf);
  506. if(!strncasecmp("Content-Length:",buf,15)) {
  507. char *ptr = buf + 15;
  508. while(*ptr == ' ') ptr++;
  509. content_length = atoi(ptr);
  510. }
  511. else if(!strncasecmp("Expect:",buf,7)) {
  512. hasExpect = true;
  513. }
  514. else if(!strncasecmp("Content-Type:",buf,13)) {
  515. ptr = buf + 13;
  516. while(*ptr == ' ') ptr++;
  517. if(strncasecmp("application/x-www-form-urlencoded", ptr, 33)) {
  518. isNotFormURLEncoded = true;
  519. }
  520. }
  521. #ifdef USE_LUA
  522. if(bbscgi && lua_script) {
  523. ptr = strchr(buf, ':');
  524. if(ptr) {
  525. std::string header(buf, ptr-buf);
  526. ptr++;
  527. while(*ptr == ' ') ptr++;
  528. headersForLua.insert(std::make_pair(header, std::string(ptr)));
  529. }
  530. }
  531. #endif
  532. }
  533. }
  534. #ifdef USE_LUA
  535. if(bbscgi && lua_script && user_agent) {
  536. headersForLua.insert(std::make_pair("User-Agent", std::string(user_agent)));
  537. }
  538. #endif
  539. if(bbscgi && !isNotFormURLEncoded && content_length) {
  540. postdata = (char *)calloc(content_length+1, 1);
  541. content_length = fread(postdata,1,content_length,fpr);
  542. if(gikofix && content_length > 0) {
  543. char *ptr = postdata+content_length-1;
  544. while(*ptr == '\r' || *ptr == '\n') {
  545. *ptr-- = 0;
  546. }
  547. }
  548. }
  549. if(bbscgi && (!bbscgi_headers.empty() || lua_script)) {
  550. std::map<std::string, std::string> fields;
  551. if(postdata) {
  552. const char *ptr = postdata;
  553. while(1) {
  554. const char *tmp = ptr;
  555. while(*tmp != '=' && *tmp != 0) tmp++;
  556. if(*tmp == 0) break;
  557. std::string key(ptr, tmp-ptr);
  558. tmp++;
  559. ptr = tmp;
  560. while(*tmp != '&' && *tmp != 0) tmp++;
  561. std::string value(ptr, tmp-ptr);
  562. fields.insert(std::make_pair(key, value));
  563. if(*tmp == 0) break;
  564. ptr = tmp + 1;
  565. }
  566. }
  567. boardStr = fields["bbs"];
  568. threadStr = fields["key"];
  569. for(std::map<std::string, std::string>::iterator it = bbscgi_headers.begin(); it!=bbscgi_headers.end(); it++) {
  570. /* we cannot use a reference here, because the original string shouldn't be replaced */
  571. std::string value = it->second;
  572. if(!hostStr.empty()) {
  573. std::string::size_type pos = value.find("%HOST%");
  574. while(pos != std::string::npos) {
  575. value.replace(pos, 6, hostStr);
  576. pos = value.find("%HOST%", pos+hostStr.length());
  577. }
  578. }
  579. if(!boardStr.empty()) {
  580. std::string::size_type pos = value.find("%BOARD%");
  581. while(pos != std::string::npos) {
  582. value.replace(pos, 7, boardStr);
  583. pos = value.find("%BOARD%", pos+boardStr.length());
  584. }
  585. }
  586. if(!threadStr.empty()) {
  587. std::string::size_type pos = value.find("%THREAD%");
  588. while(pos != std::string::npos) {
  589. value.replace(pos, 8, threadStr);
  590. pos = value.find("%THREAD%", pos+threadStr.length());
  591. }
  592. }
  593. snprintf(buf,16384,"%s: %s",it->first.c_str(),value.c_str());
  594. headers = curl_slist_append(headers, buf);
  595. log_printf(1, "Appended custom header \"%s\"\n", buf);
  596. #ifdef USE_LUA
  597. if(lua_script) headersForLua[it->first] = value;
  598. #endif
  599. }
  600. }
  601. #ifdef USE_LUA
  602. if(bbscgi && lua_script) {
  603. lua_State* l = luaL_newstate();
  604. luaL_openlibs(l);
  605. if(luaL_loadfile(l, lua_script) != LUA_OK) {
  606. log_printf(0, "Lua: Failed to open script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  607. goto lua_end;
  608. }
  609. lua_newtable(l);
  610. lua_pushstring(l, "hmacSHA256");
  611. lua_pushcfunction(l, lua_hmacSHA256);
  612. lua_settable(l, -3);
  613. lua_pushstring(l, "decodeURIComponent");
  614. lua_pushcfunction(l, lua_decodeURIComponent);
  615. lua_settable(l, -3);
  616. lua_pushstring(l, "encodeURIComponent");
  617. lua_pushcfunction(l, lua_encodeURIComponent);
  618. lua_settable(l, -3);
  619. lua_pushstring(l, "convertShiftJISToUTF8");
  620. lua_pushcfunction(l, lua_convertShiftJISToUTF8);
  621. lua_settable(l, -3);
  622. lua_pushstring(l, "monaKey");
  623. lua_pushstring(l, getMonaKey().c_str());
  624. lua_settable(l, -3);
  625. lua_setglobal(l, "proxy2ch");
  626. if(lua_pcall(l, 0, 0, 0) != LUA_OK) {
  627. log_printf(0, "Lua: Failed to run script %s:\n %s\n", lua_script, lua_tostring(l, -1));
  628. goto lua_end;
  629. }
  630. lua_getglobal(l, "willSendRequestToBbsCgi");
  631. if(!lua_isfunction(l, -1)) {
  632. log_printf(0, "Lua: willSendRequestToBbsCgi function does not exist in the script\n");
  633. goto lua_end;
  634. }
  635. lua_newtable(l);
  636. lua_pushstring(l, "headers");
  637. lua_newtable(l);
  638. for(std::map<std::string, std::string>::iterator it = headersForLua.begin(); it!=headersForLua.end(); it++) {
  639. lua_pushstring(l, it->first.c_str());
  640. lua_pushstring(l, it->second.c_str());
  641. lua_settable(l, -3);
  642. }
  643. lua_settable(l, -3);
  644. lua_pushstring(l, "body");
  645. lua_pushstring(l, postdata);
  646. lua_settable(l, -3);
  647. lua_pushstring(l, hostStr.c_str());
  648. lua_pushstring(l, boardStr.c_str());
  649. lua_pushstring(l, threadStr.c_str());
  650. if(lua_pcall(l, 4, 1, 0) != LUA_OK) {
  651. log_printf(0, "Lua: Failed to call willSendRequestToBbsCgi function:\n %s\n", lua_tostring(l, -1));
  652. goto lua_end;
  653. }
  654. if(!lua_istable(l, -1)) {
  655. log_printf(0, "Lua: A return type of willSendRequestToBbsCgi function should be a table\n");
  656. goto lua_end;
  657. }
  658. lua_pushstring(l, "body");
  659. lua_rawget(l, -2);
  660. if(lua_isstring(l, -1)) {
  661. size_t length;
  662. const char *newBody = lua_tolstring(l, -1, &length);
  663. if(length > content_length) {
  664. postdata = (char *)realloc(postdata, length+1);
  665. }
  666. strcpy(postdata, newBody);
  667. log_printf(1, "Lua: Set request body \"%s\"\n", newBody);
  668. }
  669. lua_pop(l, 1);
  670. lua_pushstring(l, "headers");
  671. lua_rawget(l, -2);
  672. if(lua_istable(l, -1)) {
  673. curl_slist_free_all(headers);
  674. headers = NULL;
  675. lua_pushnil(l);
  676. while(lua_next(l, -2)) {
  677. if(lua_isstring(l, -1) && lua_isstring(l, -2)) {
  678. std::string header = lua_tostring(l, -2);
  679. header += ": ";
  680. header += lua_tostring(l, -1);
  681. headers = curl_slist_append(headers, header.c_str());
  682. log_printf(1, "Lua: Set request header \"%s\"\n", header.c_str());
  683. }
  684. lua_pop(l, 1);
  685. }
  686. }
  687. lua_end:
  688. lua_close(l);
  689. }
  690. #endif
  691. if(!hasExpect) headers = curl_slist_append(headers, "Expect:");
  692. CURL *curl = curl_easy_init();
  693. if(curl) {
  694. CURLcode res;
  695. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  696. curl_easy_setopt(curl, CURLOPT_URL, urlMod);
  697. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  698. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  699. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_proxy);
  700. curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
  701. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_proxy);
  702. curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
  703. if(postdata) {
  704. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postdata);
  705. } else {
  706. if(content_length && isPostRequest) {
  707. /* set Content-Length explicitly via API to work properly with curl >= 7.66.0 */
  708. curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, content_length);
  709. }
  710. curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback_proxy);
  711. curl_easy_setopt(curl, CURLOPT_READDATA, this);
  712. }
  713. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  714. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  715. //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
  716. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  717. if(!strncasecmp(protocol,"HTTP/1.0",8) && !strncasecmp(urlMod, "http://", 7)) {
  718. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
  719. }
  720. else {
  721. /* force use HTTP 1.1 because CURL_HTTP_VERSION_2TLS is used on curl (w/ nghttp2) >= 7.62.0 */
  722. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  723. }
  724. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
  725. if(user_agent) {
  726. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  727. }
  728. if(isPostRequest) {
  729. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  730. }
  731. else if(!strcasecmp(method, "HEAD")) {
  732. curl_easy_setopt(curl, CURLOPT_NOBODY, 1L);
  733. }
  734. if(proxy_server) {
  735. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  736. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  737. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  738. }
  739. res = curl_easy_perform(curl);
  740. if(res != CURLE_OK) {
  741. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),urlMod);
  742. if(!status) sendResponse(503, "Service Unavailable", fpw);
  743. statusCode = 503;
  744. }
  745. else {
  746. if(chunked) {
  747. fprintf(fpw,"0\r\n\r\n");
  748. }
  749. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  750. }
  751. curl_easy_cleanup(curl);
  752. fflush(fpw);
  753. }
  754. curl_slist_free_all(headers);
  755. if(postdata) free(postdata);
  756. }
  757. }
  758. end:
  759. if(statusCode) log_printf(1, "Returned status code %d to client\n",statusCode);
  760. if(buf) free(buf);
  761. if(fpr) fclose(fpr);
  762. if(fpw) fclose(fpw);
  763. close(sock_c);
  764. delete this;
  765. }
  766. int BBS2chProxyConnection::datProxy(const char *url, const char *method)
  767. {
  768. DataStorage *html = NULL;
  769. long statusCode = 0;
  770. long rangeStart = 0, rangeEnd = 0;
  771. time_t lastModified = 0;
  772. time_t ifModifiedSince = 0;
  773. char *buf = (char *)malloc(16384);
  774. char userAgentFromHeader[1024] = "";
  775. if(!buf) goto last;
  776. while(fgets(buf,16384,fpr)) {
  777. //fprintf(stderr,"%s",buf);
  778. if(!strcmp("\r\n",buf)) break;
  779. if(!strncasecmp("Range:",buf,6)) {
  780. if(strstr(buf+7,"bytes=") && !strchr(buf+7, ',')) {
  781. char *ptr = buf+13;
  782. if(*ptr == '-') {
  783. rangeStart = atoi(ptr);
  784. }
  785. else {
  786. rangeStart = strtol(ptr, &ptr, 10);
  787. if(*ptr == '-') ptr++;
  788. if(*ptr && *ptr != '\r') {
  789. rangeEnd = strtol(ptr, NULL, 10);
  790. if(rangeEnd && rangeStart > rangeEnd) {
  791. sendResponse(416, "Requested range not satisfiable", fpw);
  792. statusCode = 416;
  793. goto last;
  794. }
  795. }
  796. }
  797. //fprintf(stderr, "range=%ld-%ld\n",rangeStart,rangeEnd);
  798. }
  799. else {
  800. sendResponse(416, "Requested range not satisfiable", fpw);
  801. statusCode = 416;
  802. goto last;
  803. }
  804. }
  805. else if(!strncasecmp("If-Modified-Since:",buf,18)) {
  806. struct tm time_ = {};
  807. strptime(buf+19,httpTimestampFmt,&time_);
  808. ifModifiedSince = mktime(&time_);
  809. }
  810. else if(!strncasecmp("User-Agent:",buf,11)) {
  811. int i=0;
  812. char *ptr = buf+12;
  813. while(*ptr != '\r' && *ptr != '\n' && i < 1023) userAgentFromHeader[i++] = *ptr++;
  814. userAgentFromHeader[i] = 0;
  815. }
  816. }
  817. if(rangeStart > 0) {
  818. PBBS2chProxyThreadInfo info;
  819. pthread_mutex_lock(mutex);
  820. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  821. if(it != threadCache->end()) {
  822. info = it->second;
  823. }
  824. pthread_mutex_unlock(mutex);
  825. log_printf(5,"range request from %ld bytes\n",rangeStart);
  826. if(info) {
  827. int from = info->lastResNum;
  828. int alreadyRead = info->cachedSize;
  829. int lastResLength = info->cachedData->length;
  830. log_printf(5,"hit %s: cached %d bytes, last res size %d\n",threadKey.c_str(),alreadyRead,lastResLength);
  831. if(rangeStart <= alreadyRead && rangeStart >= alreadyRead - lastResLength) {
  832. CURL *curl = curl_easy_init();
  833. if(curl) {
  834. CURLcode res;
  835. DataStorage *dat = new DataStorage();
  836. log_printf(5,"partial access from res num %d\n",from);
  837. snprintf(buf,16384,"%s%d-n",url,from);
  838. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  839. curl_easy_setopt(curl, CURLOPT_URL, buf);
  840. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  841. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  842. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  843. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  844. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  845. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  846. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  847. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  848. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  849. if(proxy_server) {
  850. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  851. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  852. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  853. }
  854. if(user_agent) {
  855. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  856. }
  857. else if(userAgentFromHeader[0]) {
  858. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  859. }
  860. res = curl_easy_perform(curl);
  861. if(res == CURLE_OK) {
  862. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  863. curl_easy_cleanup(curl);
  864. if(statusCode == 200 && dat->length) {
  865. DataStorage *updated = html2dat(dat, from, &lastModified, true);
  866. if(ifModifiedSince && lastModified && updated && updated->length == lastResLength) {
  867. struct tm time_ = {};
  868. gmtime_r(&lastModified,&time_);
  869. time_t tmp = mktime(&time_);
  870. if(ifModifiedSince >= tmp) {
  871. sendResponse(304, "Not Modified", fpw);
  872. log_printf(5,"not modified!\n");
  873. delete updated;
  874. delete dat;
  875. statusCode = 304;
  876. goto last;
  877. }
  878. }
  879. if(updated && updated->length && updated->length >= lastResLength) {
  880. html = new DataStorage(alreadyRead - lastResLength);
  881. html->appendBytes(updated->bytes, updated->length);
  882. if(!rangeEnd) rangeEnd = html->length - 1;
  883. if(rangeStart > rangeEnd) {
  884. sendResponse(416, "Requested range not satisfiable", fpw);
  885. delete updated;
  886. delete dat;
  887. statusCode = 416;
  888. goto last;
  889. }
  890. statusCode = 206;
  891. log_printf(5,"cache hit; reconstructed data length:%ld\n",(long)html->length);
  892. }
  893. else {
  894. log_printf(5,"cache misshit?\n");
  895. sendResponse(416, "Requested range not satisfiable", fpw);
  896. delete updated;
  897. delete dat;
  898. statusCode = 416;
  899. goto last;
  900. }
  901. delete updated;
  902. }
  903. }
  904. else {
  905. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  906. curl_easy_cleanup(curl);
  907. }
  908. delete dat;
  909. if(html) goto resp;
  910. }
  911. }
  912. else {
  913. log_printf(5,"invalid cache contents\n");
  914. pthread_mutex_lock(mutex);
  915. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  916. if(it != threadCache->end()) {
  917. threadCache->erase(it);
  918. }
  919. pthread_mutex_unlock(mutex);
  920. }
  921. }
  922. }
  923. {
  924. CURL *curl = curl_easy_init();
  925. if(curl) {
  926. CURLcode res;
  927. DataStorage *dat = new DataStorage();
  928. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  929. curl_easy_setopt(curl, CURLOPT_URL, url);
  930. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  931. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  932. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  933. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  934. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  935. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  936. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  937. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  938. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  939. if(proxy_server) {
  940. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  941. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  942. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  943. }
  944. if(user_agent) {
  945. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  946. }
  947. else if(userAgentFromHeader[0]) {
  948. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  949. }
  950. res = curl_easy_perform(curl);
  951. if(res != CURLE_OK) {
  952. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),url);
  953. sendResponse(503, "Service Unavailable", fpw);
  954. curl_easy_cleanup(curl);
  955. delete dat;
  956. statusCode = 503;
  957. goto last;
  958. }
  959. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  960. curl_easy_cleanup(curl);
  961. if(statusCode == 200) {
  962. html = html2dat(dat, 1, &lastModified, false);
  963. }
  964. delete dat;
  965. }
  966. if(!html || !html->length) {
  967. sendResponse(503, "Service Unavailable", fpw);
  968. statusCode = 503;
  969. goto last;
  970. }
  971. if((rangeStart || rangeEnd) && html && html->length) {
  972. if(!rangeEnd) rangeEnd = html->length - 1;
  973. if(rangeStart < 0) rangeStart = html->length + rangeStart;
  974. if(rangeStart < html->length && rangeEnd < html->length && rangeStart <= rangeEnd) {
  975. statusCode = 206;
  976. }
  977. else {
  978. if(ifModifiedSince && lastModified && rangeStart == html->length) {
  979. struct tm time_ = {};
  980. gmtime_r(&lastModified,&time_);
  981. time_t tmp = mktime(&time_);
  982. if(ifModifiedSince >= tmp) {
  983. sendResponse(304, "Not Modified", fpw);
  984. log_printf(5,"not modified!\n");
  985. statusCode = 304;
  986. goto last;
  987. }
  988. }
  989. sendResponse(416, "Requested range not satisfiable", fpw);
  990. statusCode = 416;
  991. goto last;
  992. }
  993. }
  994. }
  995. resp:
  996. if(statusCode == 206) sendBasicHeaders(statusCode,"Partial Content",fpw);
  997. else sendBasicHeaders(statusCode,"OK",fpw);
  998. if(0 > fprintf(fpw,"Content-Type: text/plain\r\n")) goto last;
  999. if(0 > fprintf(fpw,"Accept-Ranges: bytes\r\n")) goto last;
  1000. if(statusCode == 206) {
  1001. if(0 > fprintf(fpw,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length)) goto last;
  1002. //fprintf(stderr,"Content-Length: %ld\r\n",rangeEnd - rangeStart + 1);
  1003. //fprintf(stderr,"Content-Range: bytes %ld-%ld/%ld\r\n",rangeStart,rangeEnd,(long)html->length);
  1004. DataStorage *newHtml = new DataStorage();
  1005. newHtml->appendBytes(html->bytes+rangeStart, rangeEnd - rangeStart + 1);
  1006. delete html;
  1007. html = newHtml;
  1008. }
  1009. if(0 > fprintf(fpw,"Content-Length: %ld\r\n",(long)html->length)) goto last;
  1010. if(lastModified) {
  1011. struct tm time_ = {};
  1012. char date[256];
  1013. gmtime_r(&lastModified,&time_);
  1014. strftime(date,256,httpTimestampFmt,&time_);
  1015. if(0 > fprintf(fpw,"Last-Modified: %s\r\n",date)) goto last;
  1016. //fprintf(stderr,"Last-Modified: %s\r\n",date);
  1017. }
  1018. if(0 > fprintf(fpw,"\r\n")) goto last;
  1019. if(html && statusCode >= 200 && statusCode < 300 && strcasecmp(method, "HEAD")) {
  1020. if(html->length > fwrite(html->bytes,1,html->length,fpw)) goto last;
  1021. }
  1022. fflush(fpw);
  1023. last:
  1024. if(buf) free(buf);
  1025. if(html) delete html;
  1026. return statusCode;
  1027. }
  1028. DataStorage *BBS2chProxyConnection::html2dat_old(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1029. {
  1030. char *ptr = html->bytes;
  1031. char *end = html->bytes + html->length - 1;
  1032. DataStorage *txt = new DataStorage();
  1033. int res = startResNum, i=0;
  1034. char signature[32];
  1035. char title[1024];
  1036. int cachedSize = 0;
  1037. bool bbspink = strstr(threadKey.c_str(),"bbspink.com") ? true : false;
  1038. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  1039. if(!ptr) {
  1040. delete txt;
  1041. return NULL;
  1042. }
  1043. ptr += 7;
  1044. while(1) {
  1045. if(*ptr == '<') {
  1046. if(!strncasecmp(ptr,"</title>",8)) {
  1047. ptr += 8;
  1048. break;
  1049. }
  1050. else title[i++] = *ptr++;
  1051. }
  1052. else title[i++] = *ptr++;
  1053. }
  1054. title[i] = 0;
  1055. snprintf(signature,32,"<dt>%d ",res);
  1056. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1057. if(!ptr) {
  1058. delete txt;
  1059. return NULL;
  1060. }
  1061. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1062. if(!buffer) {
  1063. delete txt;
  1064. return NULL;
  1065. }
  1066. unsigned char *body = buffer;
  1067. char *mail = (char *)body + 65536;
  1068. char *name = mail + 1024;
  1069. char *date = name + 1024;
  1070. char *encrypted = date + 1024;
  1071. while(ptr < end) {
  1072. //fprintf(stderr,"%s\n",signature);
  1073. DataStorage *resData = new DataStorage();
  1074. i=0;
  1075. mail[0] = 0;
  1076. ptr = strstr(ptr,signature);
  1077. ptr += strlen(signature);
  1078. while(*ptr != '<') ptr++;
  1079. ptr++;
  1080. const char *endStr;
  1081. if(*ptr == 'a' || *ptr == 'A') {
  1082. replay:
  1083. // has mail
  1084. while(*ptr != '"') ptr++;
  1085. ptr++;
  1086. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1087. ptr += 28;
  1088. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1089. encrypted[i] = 0;
  1090. i = decryptMail((unsigned char *)mail,encrypted);
  1091. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1092. ptr -= reconstruct_len;
  1093. char *start = ptr;
  1094. memcpy(ptr, "<a href=\"mailto:", 16);
  1095. ptr += 16;
  1096. memcpy(ptr, mail, i);
  1097. ptr = start;
  1098. i=0;
  1099. goto replay;
  1100. }
  1101. else {
  1102. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1103. while(*ptr != '"') mail[i++] = *ptr++;
  1104. mail[i] = 0;
  1105. }
  1106. endStr = "</a>";
  1107. }
  1108. else if(*ptr == 'b') {
  1109. endStr = NULL;
  1110. }
  1111. else {
  1112. endStr = "</font>";
  1113. }
  1114. if(endStr) {
  1115. ptr = strstr(ptr,"<b>");
  1116. ptr += 3;
  1117. }
  1118. else {
  1119. ptr = strchr(ptr,'>');
  1120. ptr++;
  1121. }
  1122. i=0;
  1123. while(1) {
  1124. if(*ptr == '<') {
  1125. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  1126. ptr += 4;
  1127. if(endStr) ptr += strlen(endStr);
  1128. break;
  1129. }
  1130. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1131. int j=0;
  1132. ptr = strstr(ptr,"data-cfemail=\"");
  1133. ptr += 14;
  1134. while(*ptr != '"') encrypted[j++] = *ptr++;
  1135. encrypted[j] = 0;
  1136. j = decryptMail((unsigned char *)name+i,encrypted);
  1137. i += j;
  1138. ptr = strstr(ptr,"</script>");
  1139. ptr += 9;
  1140. }
  1141. else name[i++] = *ptr++;
  1142. }
  1143. else name[i++] = *ptr++;
  1144. }
  1145. resData->appendBytes(name, i);
  1146. resData->appendBytes("<>", 2);
  1147. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1148. resData->appendBytes("<>", 2);
  1149. ptr += 2;
  1150. i=0;
  1151. while(1) {
  1152. if(*ptr == '<') {
  1153. if(!strncasecmp(ptr,"<dd>",4)) {
  1154. ptr += 4;
  1155. break;
  1156. }
  1157. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  1158. memcpy(date+i,"BE:",3);
  1159. ptr += 23;
  1160. i += 3;
  1161. while(*ptr != ')') date[i++] = *ptr++;
  1162. date[i++] = '-';
  1163. ptr = strchr(ptr,'?');
  1164. ptr++;
  1165. char *tmp = strstr(ptr,"</a>");
  1166. memcpy(date+i,ptr,tmp-ptr);
  1167. i += tmp-ptr;
  1168. ptr = tmp + 4;
  1169. }
  1170. else date[i++] = *ptr++;
  1171. }
  1172. else date[i++] = *ptr++;
  1173. }
  1174. resData->appendBytes(date ,i);
  1175. resData->appendBytes("<>", 2);
  1176. i=0;
  1177. while(1) {
  1178. if(*ptr == '<') {
  1179. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  1180. ptr += 9;
  1181. break;
  1182. }
  1183. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  1184. while(i>0 &&body[i-1] == '\n') i--;
  1185. break;
  1186. }
  1187. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1188. int j=0;
  1189. ptr = strstr(ptr,"data-cfemail=\"");
  1190. ptr += 14;
  1191. while(*ptr != '"') encrypted[j++] = *ptr++;
  1192. encrypted[j] = 0;
  1193. j = decryptMail(body+i,encrypted);
  1194. i += j;
  1195. ptr = strstr(ptr,"</script>");
  1196. ptr += 9;
  1197. }
  1198. else if(!strncmp(ptr,"<a href=\"http",13)) {
  1199. ptr = strchr(ptr,'>');
  1200. ptr++;
  1201. char *link = ptr;
  1202. ptr = strstr(link,"</a>");
  1203. memcpy(body+i,link,ptr-link);
  1204. i += ptr-link;
  1205. ptr += 4;
  1206. }
  1207. else if(!strncmp(ptr,"<img src=\"",10)) {
  1208. ptr += 10;
  1209. char *img = ptr;
  1210. ptr = strstr(img,"\">");
  1211. memcpy(body+i,img,ptr-img);
  1212. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1213. int length = ptr-img;
  1214. while(*img != '/') {
  1215. img++;
  1216. length--;
  1217. }
  1218. memcpy(body+i,"sssp:",5);
  1219. memcpy(body+i+5,img,length);
  1220. i += length + 5;
  1221. }
  1222. else i += ptr-img;
  1223. ptr += 2;
  1224. }
  1225. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  1226. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1227. memcpy(body+i," <br>",5);
  1228. i += 5;
  1229. }
  1230. else {
  1231. memcpy(body+i,"<br>",4);
  1232. i += 4;
  1233. }
  1234. ptr += 4;
  1235. }
  1236. else body[i++] = *ptr++;
  1237. }
  1238. else if(!bbspink && *ptr == ' ') {
  1239. if(*(ptr+1) == ' ') ptr++;
  1240. else body[i++] = *ptr++;
  1241. }
  1242. else body[i++] = *ptr++;
  1243. }
  1244. resData->appendBytes(body ,i);
  1245. resData->appendBytes("<>", 2);
  1246. if(res == 1) resData->appendBytes(title ,strlen(title));
  1247. resData->appendBytes("\n" ,1);
  1248. if(useCache && res == startResNum) {
  1249. PBBS2chProxyThreadInfo info;
  1250. bool hit = false;
  1251. pthread_mutex_lock(mutex);
  1252. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1253. if(it != threadCache->end()) {
  1254. info = it->second;
  1255. threadCache->erase(it);
  1256. }
  1257. pthread_mutex_unlock(mutex);
  1258. if(info) {
  1259. log_printf(5,"cache hit");
  1260. if(info->cachedData->length == resData->length) {
  1261. log_printf(5,"... size match");
  1262. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1263. log_printf(5,"... content match");
  1264. hit = true;
  1265. cachedSize = info->cachedSize - resData->length;
  1266. }
  1267. }
  1268. log_printf(5,"\n");
  1269. }
  1270. if(!hit) {
  1271. delete resData;
  1272. free(buffer);
  1273. return NULL;
  1274. }
  1275. }
  1276. txt->appendBytes(resData->bytes, resData->length);
  1277. res++;
  1278. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1279. snprintf(signature,32,"<dt>%d ",res);
  1280. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  1281. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1282. info->lastResNum = res-1;
  1283. info->cachedSize = txt->length+cachedSize;
  1284. info->cachedData = resData;
  1285. pthread_mutex_lock(mutex);
  1286. threadCache->insert(std::make_pair(threadKey,info));
  1287. pthread_mutex_unlock(mutex);
  1288. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1289. if(lastModified) {
  1290. *lastModified = 0;
  1291. char formattedDate[256];
  1292. char *ptr;
  1293. ptr = date;
  1294. int year = strtol(ptr,&ptr,10);
  1295. if(*ptr != '/') break;
  1296. ptr++;
  1297. int month = strtol(ptr,&ptr,10);
  1298. if(*ptr != '/') break;
  1299. ptr++;
  1300. int day = strtol(ptr,&ptr,10);
  1301. if(!*ptr) break;
  1302. while(*ptr != ' ' && *ptr != 0) ptr++;
  1303. if(!*ptr) break;
  1304. ptr++;
  1305. int hour = strtol(ptr,&ptr,10);
  1306. if(*ptr != ':') break;
  1307. ptr++;
  1308. int minutes = strtol(ptr,&ptr,10);
  1309. if(*ptr != ':') break;
  1310. ptr++;
  1311. int seconds = strtol(ptr,&ptr,10);
  1312. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1313. if(year < 100) year += 2000;
  1314. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1315. //fprintf(stderr,"%s\n",formattedDate);
  1316. struct tm time = {};
  1317. strptime(formattedDate,threadTimestampFmt,&time);
  1318. *lastModified = mktime(&time);
  1319. //gmtime_r(lastModified,&time);
  1320. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1321. //fprintf(stderr,"%s\n",formattedDate);
  1322. }
  1323. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1324. break;
  1325. }
  1326. delete resData;
  1327. }
  1328. free(buffer);
  1329. return txt;
  1330. }
  1331. DataStorage *BBS2chProxyConnection::html2dat(DataStorage *html, int startResNum, time_t *lastModified, bool useCache)
  1332. {
  1333. char *ptr = html->bytes;
  1334. char *end = html->bytes + html->length - 1;
  1335. DataStorage *txt = new DataStorage();
  1336. int res = startResNum, i=0;
  1337. char signature[64];
  1338. char title[1024];
  1339. int cachedSize = 0;
  1340. char signatureTag[32];
  1341. char closeTag[32];
  1342. int closeTagLen;
  1343. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  1344. if(!ptr) {
  1345. delete txt;
  1346. return html2dat_old(html, startResNum, lastModified, useCache);
  1347. }
  1348. else {
  1349. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  1350. if(ptr2) {
  1351. char *tmp = ptr2;
  1352. *ptr2 = 0;
  1353. while(*ptr2 != '<') ptr2--;
  1354. strcpy(signatureTag, ptr2);
  1355. *tmp = ' ';
  1356. }
  1357. else {
  1358. delete txt;
  1359. return NULL;
  1360. }
  1361. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  1362. if(ptr2) {
  1363. delete txt;
  1364. return html2dat_pink(html, startResNum, lastModified, useCache);
  1365. }*/
  1366. }
  1367. ptr += 18;
  1368. while(1) {
  1369. if(*ptr == '<') {
  1370. if(!strncasecmp(ptr,"</h1>",5)) {
  1371. ptr += 5;
  1372. break;
  1373. }
  1374. else title[i++] = *ptr++;
  1375. }
  1376. else if(*ptr == '\n') break;
  1377. else title[i++] = *ptr++;
  1378. }
  1379. title[i] = 0;
  1380. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  1381. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1382. if(!ptr) {
  1383. delete txt;
  1384. return NULL;
  1385. }
  1386. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  1387. if(!buffer) {
  1388. delete txt;
  1389. return NULL;
  1390. }
  1391. unsigned char *body = buffer;
  1392. char *mail = (char *)body + 65536;
  1393. char *name = mail + 1024;
  1394. char *date = name + 1024;
  1395. char *encrypted = date + 1024;
  1396. while(ptr < end) {
  1397. //fprintf(stderr,"%s\n",signature);
  1398. DataStorage *resData = new DataStorage();
  1399. i=0;
  1400. mail[0] = 0;
  1401. ptr = strstr(ptr," class=\"name\"><b>");
  1402. if(ptr) {
  1403. char *tmp = ptr;
  1404. *ptr = 0;
  1405. while(*ptr != '<') ptr--;
  1406. snprintf(closeTag,32,"</%s>",ptr+1);
  1407. closeTagLen = strlen(closeTag);
  1408. ptr = tmp + 17;
  1409. }
  1410. else {
  1411. delete resData;
  1412. break;
  1413. }
  1414. char endStr[64];
  1415. if(!strncmp(ptr,"<a href=\"mailto:",16)) {
  1416. replay:
  1417. // has mail
  1418. while(*ptr != '"') ptr++;
  1419. ptr++;
  1420. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  1421. ptr += 28;
  1422. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  1423. encrypted[i] = 0;
  1424. i = decryptMail((unsigned char *)mail,encrypted);
  1425. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  1426. ptr -= reconstruct_len;
  1427. char *start = ptr;
  1428. memcpy(ptr, "<a href=\"mailto:", 16);
  1429. ptr += 16;
  1430. memcpy(ptr, mail, i);
  1431. ptr = start;
  1432. i=0;
  1433. goto replay;
  1434. }
  1435. else {
  1436. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  1437. while(1) {
  1438. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  1439. ptr = strchr(ptr,'>');
  1440. ptr++;
  1441. char *link = ptr;
  1442. ptr = strstr(link,"</a>");
  1443. memcpy(mail+i,link,ptr-link);
  1444. i += ptr-link;
  1445. ptr += 4;
  1446. }
  1447. else if(*ptr == '"') break;
  1448. else mail[i++] = *ptr++;
  1449. }
  1450. //while(*ptr != '"') mail[i++] = *ptr++;
  1451. mail[i] = 0;
  1452. }
  1453. snprintf(endStr,64,"</a></b>%s",closeTag);
  1454. while(*ptr != '>') ptr++;
  1455. ptr++;
  1456. }
  1457. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  1458. emit font tags anymore and it conflicts with text decorations using "melon point" */
  1459. /*else if(!strncmp(ptr,"<font",5)) {
  1460. snprintf(endStr,64,"</font></b>%s",closeTag);
  1461. while(*ptr != '>') ptr++;
  1462. ptr++;
  1463. }*/
  1464. else {
  1465. snprintf(endStr,64,"</b>%s",closeTag);
  1466. }
  1467. i=0;
  1468. while(1) {
  1469. if(*ptr == '<') {
  1470. if(!strncmp(ptr,endStr,strlen(endStr))) {
  1471. ptr += strlen(endStr);
  1472. break;
  1473. }
  1474. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  1475. int j=0;
  1476. ptr = strstr(ptr,"data-cfemail=\"");
  1477. ptr += 14;
  1478. while(*ptr != '"') encrypted[j++] = *ptr++;
  1479. encrypted[j] = 0;
  1480. j = decryptMail((unsigned char *)name+i,encrypted);
  1481. i += j;
  1482. ptr = strstr(ptr,"</script>");
  1483. ptr += 9;
  1484. }
  1485. else if(!strncmp(ptr,"<a href=\"",9)) {
  1486. ptr = strchr(ptr,'>');
  1487. ptr++;
  1488. char *link = ptr;
  1489. ptr = strstr(link,"</a>");
  1490. memcpy(name+i,link,ptr-link);
  1491. i += ptr-link;
  1492. ptr += 4;
  1493. }
  1494. else name[i++] = *ptr++;
  1495. }
  1496. else name[i++] = *ptr++;
  1497. }
  1498. resData->appendBytes(name, i);
  1499. resData->appendBytes("<>", 2);
  1500. if(mail[0]) resData->appendBytes(mail ,strlen(mail));
  1501. resData->appendBytes("<>", 2);
  1502. ptr = strstr(ptr," class=\"date\">");
  1503. if(ptr) {
  1504. char *tmp = ptr;
  1505. *ptr = 0;
  1506. while(*ptr != '<') ptr--;
  1507. snprintf(closeTag,32,"</%s>",ptr+1);
  1508. closeTagLen = strlen(closeTag);
  1509. ptr = tmp + 14;
  1510. }
  1511. else {
  1512. delete resData;
  1513. break;
  1514. }
  1515. i=0;
  1516. while(1) {
  1517. if(*ptr == '<') {
  1518. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1519. ptr += closeTagLen;
  1520. break;
  1521. }
  1522. else date[i++] = *ptr++;
  1523. }
  1524. else date[i++] = *ptr++;
  1525. }
  1526. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  1527. char *tmp = ptr+1;
  1528. while(*ptr != ' ') ptr++;
  1529. *ptr = 0;
  1530. snprintf(closeTag,32,"</%s>",tmp);
  1531. closeTagLen = strlen(closeTag);
  1532. ptr += 11;
  1533. while(*ptr != '>') ptr++;
  1534. ptr++;
  1535. date[i++] = ' ';
  1536. while(1) {
  1537. if(*ptr == '<') {
  1538. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1539. ptr += closeTagLen;
  1540. break;
  1541. }
  1542. else date[i++] = *ptr++;
  1543. }
  1544. else date[i++] = *ptr++;
  1545. }
  1546. }
  1547. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  1548. ptr += 14;
  1549. while(*ptr != '>') ptr++;
  1550. ptr++;
  1551. if(!strncmp(ptr,"<a href=\"",9)) {
  1552. ptr += 9;
  1553. while(*ptr != '/' && *ptr != '"') ptr++;
  1554. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  1555. memcpy(date+i," BE:",4);
  1556. i += 4;
  1557. ptr += 18;
  1558. while(*ptr != '"') date[i++] = *ptr++;
  1559. date[i++] = '-';
  1560. ptr = strchr(ptr,'?');
  1561. ptr++;
  1562. char *tmp = strstr(ptr,"</a>");
  1563. memcpy(date+i,ptr,tmp-ptr);
  1564. i += tmp-ptr;
  1565. ptr = tmp + 4;
  1566. }
  1567. }
  1568. }
  1569. resData->appendBytes(date ,i);
  1570. resData->appendBytes("<>", 2);
  1571. if(!strcmp(signatureTag,"<div")) {
  1572. ptr = strstr(ptr,"<div class=\"message\">");
  1573. if(!ptr) {
  1574. delete resData;
  1575. break;
  1576. }
  1577. else {
  1578. ptr += 21;
  1579. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  1580. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  1581. strcpy(closeTag,"</span></span></div>");
  1582. closeTagLen = 20;
  1583. ptr += 22+17;
  1584. }
  1585. else {
  1586. strcpy(closeTag,"</span></div>");
  1587. closeTagLen = 13;
  1588. ptr += 22;
  1589. }
  1590. }
  1591. else {
  1592. strcpy(closeTag,"</div>");
  1593. closeTagLen = 6;
  1594. }
  1595. }
  1596. }
  1597. else {
  1598. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  1599. if(!ptr) {
  1600. delete resData;
  1601. break;
  1602. }
  1603. strcpy(closeTag,"</dd>");
  1604. closeTagLen = 5;
  1605. ptr += 22;
  1606. }
  1607. i=0;
  1608. while(1) {
  1609. if(*ptr == '<') {
  1610. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  1611. ptr += closeTagLen;
  1612. break;
  1613. }
  1614. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  1615. int j=0;
  1616. ptr = strstr(ptr,"data-cfemail=\"");
  1617. ptr += 14;
  1618. while(*ptr != '"') encrypted[j++] = *ptr++;
  1619. encrypted[j] = 0;
  1620. j = decryptMail(body+i,encrypted);
  1621. i += j;
  1622. ptr = strstr(ptr,"</script>");
  1623. ptr += 9;
  1624. }
  1625. else if(!strncmp(ptr,"<a ",3)) {
  1626. char *tmp = strchr(ptr,'>');
  1627. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  1628. char *link = tmp+1;
  1629. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  1630. while(ptr < link) {
  1631. if(!strncmp(ptr," class=\"",8)) {
  1632. ptr += 8;
  1633. while(*ptr != '"' && *ptr != '>') ptr++;
  1634. if(*ptr == '"') ptr++;
  1635. }
  1636. else body[i++] = *ptr++;
  1637. }
  1638. }
  1639. else {
  1640. ptr = strstr(link,"</a>");
  1641. memcpy(body+i,link,ptr-link);
  1642. i += ptr-link;
  1643. ptr += 4;
  1644. }
  1645. }
  1646. else if(!strncmp(ptr,"<img src=\"",10)) {
  1647. ptr += 10;
  1648. char *img = ptr;
  1649. ptr = strstr(img,"\">");
  1650. memcpy(body+i,img,ptr-img);
  1651. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  1652. int length = ptr-img;
  1653. while(*img != '/') {
  1654. img++;
  1655. length--;
  1656. }
  1657. memcpy(body+i,"sssp:",5);
  1658. memcpy(body+i+5,img,length);
  1659. i += length + 5;
  1660. }
  1661. else i += ptr-img;
  1662. ptr += 2;
  1663. }
  1664. else if(!strncmp(ptr,"<br>",4)) {
  1665. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  1666. memcpy(body+i," <br>",5);
  1667. i += 5;
  1668. }
  1669. else {
  1670. memcpy(body+i,"<br>",4);
  1671. i += 4;
  1672. }
  1673. ptr += 4;
  1674. }
  1675. else body[i++] = *ptr++;
  1676. }
  1677. else body[i++] = *ptr++;
  1678. }
  1679. resData->appendBytes(body ,i);
  1680. resData->appendBytes("<>", 2);
  1681. if(res == 1) resData->appendBytes(title ,strlen(title));
  1682. resData->appendBytes("\n" ,1);
  1683. if(useCache && res == startResNum) {
  1684. PBBS2chProxyThreadInfo info;
  1685. bool hit = false;
  1686. pthread_mutex_lock(mutex);
  1687. BBS2chProxyThreadCache::iterator it = threadCache->find(threadKey);
  1688. if(it != threadCache->end()) {
  1689. info = it->second;
  1690. threadCache->erase(it);
  1691. }
  1692. pthread_mutex_unlock(mutex);
  1693. if(info) {
  1694. log_printf(5,"cache hit");
  1695. if(info->cachedData->length == resData->length) {
  1696. log_printf(5,"... size match");
  1697. if(!memcmp(info->cachedData->bytes,resData->bytes,resData->length)) {
  1698. log_printf(5,"... content match");
  1699. hit = true;
  1700. cachedSize = info->cachedSize - resData->length;
  1701. }
  1702. }
  1703. log_printf(5,"\n");
  1704. }
  1705. if(!hit) {
  1706. delete resData;
  1707. free(buffer);
  1708. return NULL;
  1709. }
  1710. }
  1711. txt->appendBytes(resData->bytes, resData->length);
  1712. res++;
  1713. while(*ptr == '\n' || *ptr == '\r') ptr++;
  1714. snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  1715. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  1716. if(ptr) {
  1717. int next = atoi(ptr+strlen(signature));
  1718. if(next >= res) {
  1719. while(next > res) {
  1720. txt->appendBytes("broken<><>broken<> broken <>\n", 29);
  1721. res++;
  1722. }
  1723. }
  1724. else ptr = NULL;
  1725. }
  1726. if(!ptr) {
  1727. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1728. info->lastResNum = res-1;
  1729. info->cachedSize = txt->length+cachedSize;
  1730. info->cachedData = resData;
  1731. pthread_mutex_lock(mutex);
  1732. threadCache->insert(std::make_pair(threadKey,info));
  1733. pthread_mutex_unlock(mutex);
  1734. log_printf(5,"cached thread %s (%ld bytes)\n",threadKey.c_str(),(long)resData->length);
  1735. if(lastModified) {
  1736. *lastModified = 0;
  1737. char formattedDate[256];
  1738. char *ptr;
  1739. ptr = date;
  1740. int year = strtol(ptr,&ptr,10);
  1741. if(*ptr != '/') break;
  1742. ptr++;
  1743. int month = strtol(ptr,&ptr,10);
  1744. if(*ptr != '/') break;
  1745. ptr++;
  1746. int day = strtol(ptr,&ptr,10);
  1747. if(!*ptr) break;
  1748. while(*ptr != ' ' && *ptr != 0) ptr++;
  1749. if(!*ptr) break;
  1750. ptr++;
  1751. int hour = strtol(ptr,&ptr,10);
  1752. if(*ptr != ':') break;
  1753. ptr++;
  1754. int minutes = strtol(ptr,&ptr,10);
  1755. if(*ptr != ':') break;
  1756. ptr++;
  1757. int seconds = strtol(ptr,&ptr,10);
  1758. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  1759. if(year < 100) year += 2000;
  1760. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1761. //fprintf(stderr,"%s\n",formattedDate);
  1762. struct tm time = {};
  1763. strptime(formattedDate,threadTimestampFmt,&time);
  1764. *lastModified = mktime(&time);
  1765. //gmtime_r(lastModified,&time);
  1766. //strftime(formattedDate,256,httpTimestampFmt,&time);
  1767. //fprintf(stderr,"%s\n",formattedDate);
  1768. }
  1769. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1770. break;
  1771. }
  1772. delete resData;
  1773. }
  1774. free(buffer);
  1775. return txt;
  1776. }
  1777. int BBS2chProxyConnection::datProxyAPI(const char *url, const char *method)
  1778. {
  1779. long statusCode = 0;
  1780. std::string postBody = auth->requestBodyForURL(url);
  1781. if(postBody.empty()) {
  1782. sendResponse(401, "Unauthorized", fpw);
  1783. return 401;
  1784. }
  1785. CURL *curl = curl_easy_init();
  1786. char *buf = (char *)malloc(16384);
  1787. if(curl) {
  1788. CURLcode res;
  1789. struct curl_slist *headers = NULL;
  1790. DataStorage *receivedHeader = new DataStorage();
  1791. DataStorage *receivedBody = new DataStorage();
  1792. while(fgets(buf,16384,fpr)) {
  1793. //fprintf(stderr,"%s",buf);
  1794. if(!strcmp("\r\n",buf)) break;
  1795. else if(!strncasecmp("Range:",buf,6)
  1796. || !strncasecmp("If-Modified-Since:",buf,18)
  1797. || !strncasecmp("Accept-Encoding:",buf,16)) {
  1798. char *ptr;
  1799. ptr = strchr(buf, '\r');
  1800. if(!ptr) ptr = strchr(buf, '\n');
  1801. if(!ptr) continue;
  1802. *ptr = 0;
  1803. headers = curl_slist_append(headers, buf);
  1804. }
  1805. }
  1806. if(x_2ch_ua_dat) headers = curl_slist_append(headers, x_2ch_ua_dat);
  1807. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1808. curl_easy_setopt(curl, CURLOPT_URL, url);
  1809. curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
  1810. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1811. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1812. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1813. curl_easy_setopt(curl, CURLOPT_WRITEDATA, receivedBody);
  1814. curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_callback_download);
  1815. curl_easy_setopt(curl, CURLOPT_HEADERDATA, receivedHeader);
  1816. curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  1817. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1818. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1819. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1820. if(proxy_server) {
  1821. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1822. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1823. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1824. }
  1825. curl_easy_setopt(curl, CURLOPT_USERAGENT, api_ua_dat?api_ua_dat:"");
  1826. curl_easy_setopt(curl, CURLOPT_POST, 1L);
  1827. #if LIBCURL_VERSION_NUM >= 0x071101
  1828. curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS, postBody.c_str());
  1829. #else
  1830. curl_easy_setopt(curl, CURLOPT_POSTFIELDS, postBody.c_str());
  1831. #endif
  1832. //return;
  1833. res = curl_easy_perform(curl);
  1834. if(res == CURLE_OK) {
  1835. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1836. receivedHeader->appendBytes("",1);
  1837. char *ptr = strstr(receivedHeader->bytes,"\r\n\r\n");
  1838. *(ptr+4) = 0;
  1839. int threadStatus = 0;
  1840. for(char *ptr2 = receivedHeader->bytes; ptr2<ptr;) {
  1841. if(!strncasecmp(ptr2, "Thread-Status:", strlen("Thread-Status:"))) {
  1842. ptr2 += strlen("Thread-Status:");
  1843. while(*ptr2 == ' ') ptr2++;
  1844. threadStatus = atoi(ptr2);
  1845. break;
  1846. }
  1847. ptr2 = strstr(ptr2, "\r\n") + 2;
  1848. }
  1849. if(threadStatus == 1) {
  1850. if(ptr+4-receivedHeader->bytes > fwrite(receivedHeader->bytes,1,ptr+4-receivedHeader->bytes,fpw)) goto last;
  1851. fflush(fpw);
  1852. if(receivedBody->length > fwrite(receivedBody->bytes,1,receivedBody->length,fpw)) goto last;
  1853. fflush(fpw);
  1854. goto last;
  1855. }
  1856. else if(threadStatus == 8) {
  1857. sendBasicHeaders(302, "Found", fpw);
  1858. if(0 > fprintf(fpw, "Location: http://www2.2ch.net/live.html\r\n")) goto last;
  1859. if(0 > fprintf(fpw, "\r\n")) goto last;
  1860. fflush(fpw);
  1861. statusCode = 302;
  1862. goto last;
  1863. }
  1864. else {
  1865. if (statusCode < 400) {
  1866. sendResponse(401, "Unauthorized", fpw);
  1867. statusCode = 401;
  1868. }
  1869. else {
  1870. sendResponse(503, "Service Unavailable", fpw);
  1871. statusCode = 503;
  1872. }
  1873. receivedBody->appendBytes("",1);
  1874. if(!strncasecmp(receivedBody->bytes,"ng (",4)) {
  1875. log_printf(0,"API gateway returned error: %s\n",receivedBody->bytes);
  1876. }
  1877. }
  1878. //fprintf(stderr,"%ld\n",statusCode);
  1879. }
  1880. else {
  1881. log_printf(0,"curl error: %s\n",curl_easy_strerror(res));
  1882. sendResponse(503, "Service Unavailable", fpw);
  1883. statusCode = 503;
  1884. }
  1885. last:
  1886. curl_easy_cleanup(curl);
  1887. curl_slist_free_all(headers);
  1888. delete receivedBody;
  1889. delete receivedHeader;
  1890. }
  1891. free(buf);
  1892. return statusCode;
  1893. }
  1894. int BBS2chProxyConnection::bbsmenuProxy(const char *url, const char *method)
  1895. {
  1896. long statusCode = 0;
  1897. DataStorage *dat = new DataStorage();
  1898. DataStorage *outHTML = new DataStorage();
  1899. char *buf = (char *)malloc(16384);
  1900. char userAgentFromHeader[1024] = "";
  1901. CURL *curl = NULL;
  1902. if(!buf) goto last;
  1903. while(fgets(buf,16384,fpr)) {
  1904. //fprintf(stderr,"%s",buf);
  1905. if(!strcmp("\r\n",buf)) break;
  1906. else if(!strncasecmp("User-Agent:",buf,11)) {
  1907. int i=0;
  1908. char *ptr = buf+12;
  1909. while(*ptr != '\r' && *ptr != '\n' && i < 1023) userAgentFromHeader[i++] = *ptr++;
  1910. userAgentFromHeader[i] = 0;
  1911. }
  1912. }
  1913. curl = curl_easy_init();
  1914. if(curl) {
  1915. CURLcode res;
  1916. if(curl_share) curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
  1917. curl_easy_setopt(curl, CURLOPT_URL, url);
  1918. curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
  1919. curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout);
  1920. curl_easy_setopt(curl, CURLOPT_ENCODING, "");
  1921. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  1922. curl_easy_setopt(curl, CURLOPT_WRITEDATA, dat);
  1923. curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
  1924. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);
  1925. curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);
  1926. if(force_ipv4) curl_easy_setopt(curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  1927. if(proxy_server) {
  1928. curl_easy_setopt(curl, CURLOPT_PROXY, proxy_server);
  1929. curl_easy_setopt(curl, CURLOPT_PROXYPORT, proxy_port);
  1930. curl_easy_setopt(curl, CURLOPT_PROXYTYPE, proxy_type);
  1931. }
  1932. if(user_agent) {
  1933. curl_easy_setopt(curl, CURLOPT_USERAGENT, user_agent);
  1934. }
  1935. else if(userAgentFromHeader[0]) {
  1936. curl_easy_setopt(curl, CURLOPT_USERAGENT, userAgentFromHeader);
  1937. }
  1938. res = curl_easy_perform(curl);
  1939. if(res == CURLE_OK) {
  1940. curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE, &statusCode);
  1941. if(statusCode == 200 && dat->length) {
  1942. dat->appendBytes("",1);
  1943. dat->length--;
  1944. char *ptr = dat->bytes;
  1945. while(*ptr) {
  1946. if(!strncasecmp(ptr,"<a href=",8)) {
  1947. char *start = ptr;
  1948. char *end = strchr(ptr+8,'>');
  1949. ptr = strstr(ptr+8,"://");
  1950. if(ptr && ptr < end) {
  1951. char *protocol = ptr;
  1952. char *end2 = strchr(ptr+3,'/');
  1953. ptr = strstr(protocol+3,"5ch.net");
  1954. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1955. memcpy(ptr,"2ch",3);
  1956. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1957. else outHTML->appendBytes(start, protocol-start);
  1958. outHTML->appendBytes(protocol, end-protocol);
  1959. ptr = end;
  1960. continue;
  1961. }
  1962. ptr = strstr(protocol+3,"bbspink.com");
  1963. if(ptr && ptr < end2 && (*(ptr-1)=='.' || *(ptr-1)=='/')) {
  1964. if(*(protocol-1) == 's') outHTML->appendBytes(start, protocol-start-1);
  1965. else outHTML->appendBytes(start, protocol-start);
  1966. outHTML->appendBytes(protocol, end-protocol);
  1967. ptr = end;
  1968. continue;
  1969. }
  1970. }
  1971. ptr = start;
  1972. }
  1973. outHTML->appendBytes(ptr++, 1);
  1974. }
  1975. }
  1976. }
  1977. else {
  1978. log_printf(0,"curl error: %s (%s)\n",curl_easy_strerror(res),buf);
  1979. statusCode = 503;
  1980. }
  1981. }
  1982. if(statusCode == 200) {
  1983. sendBasicHeaders(statusCode,"OK",fpw);
  1984. if(0 > fprintf(fpw,"Content-Type: text/html\r\n")) goto last;
  1985. if(0 > fprintf(fpw,"Content-Length: %ld\r\n",(long)outHTML->length)) goto last;
  1986. if(0 > fprintf(fpw,"\r\n")) goto last;
  1987. if(strcasecmp(method, "HEAD")) {
  1988. if(outHTML->length > fwrite(outHTML->bytes,1,outHTML->length,fpw)) goto last;
  1989. }
  1990. fflush(fpw);
  1991. }
  1992. else {
  1993. sendResponse(503, "Service Unavailable", fpw);
  1994. statusCode = 503;
  1995. }
  1996. last:
  1997. if(curl) curl_easy_cleanup(curl);
  1998. if(buf) free(buf);
  1999. if(dat) delete dat;
  2000. if(outHTML) delete outHTML;
  2001. return statusCode;
  2002. }