BBS2chProxyHTML2Dat.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308
  1. #include <sstream>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <unistd.h>
  5. #include "BBS2chProxyHTML2Dat.h"
  6. #include "stringEncodingConverter.h"
  7. extern char *proxy_server;
  8. extern long proxy_port;
  9. extern long proxy_type;
  10. extern long timeout;
  11. extern char *user_agent;
  12. extern int force_ipv4;
  13. extern CURLSH *curl_share;
  14. extern void log_printf(int level, const char *format ...);
  15. extern void *memmem_priv(const void *l, size_t l_len, const void *s, size_t s_len);
  16. #ifdef _WIN32
  17. #define gmtime_r(a, b) gmtime_s(b, a)
  18. #endif
  19. static const char threadTimestampFmt[] = "%Y/%m/%d %H:%M:%S %Z";
  20. static const char *wdays[7] = {
  21. "日",
  22. "月",
  23. "火",
  24. "水",
  25. "木",
  26. "金",
  27. "土"
  28. };
  29. static int decryptMail(unsigned char *decrypted, char *encrypted)
  30. {
  31. char current[5]="0x";
  32. unsigned char *ptr = decrypted;
  33. current[2] = encrypted[0];
  34. current[3] = encrypted[1];
  35. unsigned int r = strtol(current,NULL,16);
  36. int len = strlen(encrypted);
  37. int n = 2;
  38. for(;n<len;n+=2) {
  39. current[2] = encrypted[n];
  40. current[3] = encrypted[n+1];
  41. unsigned int i = strtol(current,NULL,16);
  42. *ptr++ = i^r;
  43. }
  44. *ptr = 0;
  45. //fprintf(stderr,"%s->%s\n",encrypted,decrypted);
  46. return ptr - decrypted;
  47. }
  48. static void replaceAll(std::string &input, const std::string &oldValue, const std::string &newValue)
  49. {
  50. if (!oldValue.empty()) {
  51. size_t pos = 0;
  52. while ((pos = input.find(oldValue, pos)) != std::string::npos) {
  53. input.replace(pos, oldValue.size(), newValue);
  54. pos += newValue.size();
  55. }
  56. }
  57. }
  58. static void escapeForHTML(std::string &input)
  59. {
  60. replaceAll(input, "&", "&amp;");
  61. replaceAll(input, "<", "&lt;");
  62. replaceAll(input, ">", "&gt;");
  63. replaceAll(input, "\"", "&quot;");
  64. replaceAll(input, "'", "&#39;");
  65. }
  66. static size_t write_callback_download(char *buffer, size_t size, size_t nitems, void *userdata)
  67. {
  68. std::vector<char> *data = static_cast<std::vector<char> *>(userdata);
  69. size_t downloaded = size*nitems;
  70. data->insert(data->end(), buffer, buffer+downloaded);
  71. return downloaded;
  72. }
  73. BBS2chProxyHTML2Dat5ch::BBS2chProxyHTML2Dat5ch(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, bool useHttps, CURL *curl)
  74. : IBBS2chProxyHTML2Dat(cache, identifier, curl)
  75. {
  76. _url = useHttps ? "https://" : "http://";
  77. _url += identifier.host;
  78. _url += "/test/read.cgi/";
  79. _url += identifier.board;
  80. _url += '/';
  81. _url += identifier.key;
  82. _url += '/';
  83. }
  84. BBS2chProxyHTML2DatTalk::BBS2chProxyHTML2DatTalk(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  85. : IBBS2chProxyHTML2Dat(cache, identifier, curl), _cachedJson(NULL)
  86. {
  87. _url = "https://talk.jp/api/boards/";
  88. _url += identifier.board;
  89. _url += "/threads/";
  90. _url += identifier.key;
  91. }
  92. BBS2chProxyHTML2DatTalkHTML::BBS2chProxyHTML2DatTalkHTML(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  93. : BBS2chProxyHTML2DatTalk(cache, identifier, curl)
  94. {
  95. _url = "https://talk.jp/boards/";
  96. _url += identifier.board;
  97. _url += '/';
  98. _url += identifier.key;
  99. _url += '/';
  100. }
  101. BBS2chProxyHTML2Dat5chItest::BBS2chProxyHTML2Dat5chItest(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  102. : BBS2chProxyHTML2DatTalk(cache, identifier, curl)
  103. {
  104. _url = "https://itest.5ch.net/public/newapi/client.php?subdomain=";
  105. _url += identifier.hostPrefix;
  106. _url += "&board=";
  107. _url += identifier.board;
  108. _url += "&dat=";
  109. _url += identifier.key;
  110. }
  111. std::vector<char> IBBS2chProxyHTML2Dat::getHtmlFromURL(const std::string &url, long *outStatusCode)
  112. {
  113. CURLcode res;
  114. long statusCode = 0;
  115. std::vector<char> html;
  116. if (curl_share) curl_easy_setopt(_curl, CURLOPT_SHARE, curl_share);
  117. curl_easy_setopt(_curl, CURLOPT_URL, url.c_str());
  118. curl_easy_setopt(_curl, CURLOPT_NOSIGNAL, 1L);
  119. curl_easy_setopt(_curl, CURLOPT_TIMEOUT, timeout);
  120. curl_easy_setopt(_curl, CURLOPT_ENCODING, "");
  121. curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  122. curl_easy_setopt(_curl, CURLOPT_WRITEDATA, &html);
  123. curl_easy_setopt(_curl, CURLOPT_FOLLOWLOCATION, 1L);
  124. curl_easy_setopt(_curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
  125. curl_easy_setopt(_curl, CURLOPT_SSL_VERIFYHOST, 0L);
  126. curl_easy_setopt(_curl, CURLOPT_SSL_VERIFYPEER, 0L);
  127. if (force_ipv4) curl_easy_setopt(_curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
  128. if (proxy_server) {
  129. curl_easy_setopt(_curl, CURLOPT_PROXY, proxy_server);
  130. curl_easy_setopt(_curl, CURLOPT_PROXYPORT, proxy_port);
  131. curl_easy_setopt(_curl, CURLOPT_PROXYTYPE, proxy_type);
  132. }
  133. if (user_agent) {
  134. curl_easy_setopt(_curl, CURLOPT_USERAGENT, user_agent);
  135. }
  136. else if (!_userAgent.empty()) {
  137. curl_easy_setopt(_curl, CURLOPT_USERAGENT, _userAgent.c_str());
  138. }
  139. res = curl_easy_perform(_curl);
  140. if (res == CURLE_OK) {
  141. curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statusCode);
  142. if (statusCode != 200) html.clear();
  143. } else {
  144. log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), url.c_str());
  145. }
  146. curl_easy_reset(_curl);
  147. if (outStatusCode) *outStatusCode = statusCode;
  148. return html;
  149. }
  150. void IBBS2chProxyHTML2Dat::setRequestHeaders(BBS2chProxyHttpHeaders &headers)
  151. {
  152. if (headers.has("User-Agent")) {
  153. _userAgent = headers.get("User-Agent");
  154. }
  155. }
  156. const std::string& IBBS2chProxyHTML2Dat::getKey()
  157. {
  158. return _threadKey;
  159. }
  160. std::string BBS2chProxyHTML2Dat5ch::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  161. {
  162. std::string tmpURL(_url);
  163. if (startFrom > 1) {
  164. std::ostringstream ss;
  165. ss << startFrom << "-n";
  166. tmpURL += ss.str();
  167. } else {
  168. tmpURL += "1-";
  169. }
  170. curl_easy_setopt(_curl, CURLOPT_COOKIE, "5chClassic=on");
  171. std::vector<char> html = getHtmlFromURL(tmpURL, outStatusCode);
  172. return html2dat(html, startFrom, lastModifiedOut, useCache);
  173. }
  174. std::string BBS2chProxyHTML2DatTalk::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  175. {
  176. if (!_cachedJson) {
  177. std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
  178. if (json.empty()) return "";
  179. json.push_back(0);
  180. _cachedJson = json_parse_string(&json.front());
  181. }
  182. if (!_cachedJson) return "";
  183. return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
  184. }
  185. std::string BBS2chProxyHTML2DatTalkHTML::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  186. {
  187. if (!_cachedJson) {
  188. std::vector<char> html = getHtmlFromURL(_url, outStatusCode);
  189. if (html.empty()) return "";
  190. html.push_back(0);
  191. const char *ptr = strstr(&html.front(), "id=\"__NEXT_DATA__\"");
  192. if (ptr) {
  193. ptr += strlen("id=\"__NEXT_DATA__\"");
  194. while (*ptr != '>' && *ptr != 0) ptr++;
  195. if (*ptr) {
  196. const char *end = strstr(++ptr, "</script>");
  197. if (end) {
  198. std::string jsonStr(ptr, end-ptr);
  199. _cachedJson = json_parse_string(jsonStr.c_str());
  200. }
  201. }
  202. }
  203. }
  204. if (!_cachedJson) return "";
  205. JSON_Value *threadData = json_object_dotget_value(json_object(_cachedJson), "props.pageProps.threadData");
  206. return json2dat(threadData, startFrom, lastModifiedOut, useCache);
  207. }
  208. std::string BBS2chProxyHTML2Dat5chItest::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  209. {
  210. if (!_cachedJson) {
  211. std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
  212. if (json.empty()) return "";
  213. json.push_back(0);
  214. _cachedJson = json_parse_string(&json.front());
  215. }
  216. if (!_cachedJson) return "";
  217. return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
  218. }
  219. std::string BBS2chProxyHTML2Dat5ch::html2dat_old(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
  220. {
  221. char *ptr = &html.front();
  222. char *end = &html.back();
  223. std::string txt;
  224. int res = startResNum, i=0;
  225. char signature[32];
  226. char title[1024];
  227. int cachedSize = 0;
  228. bool bbspink = strstr(_threadKey.c_str(),"bbspink.com") ? true : false;
  229. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  230. if(!ptr) {
  231. return "";
  232. }
  233. ptr += 7;
  234. while(1) {
  235. if(*ptr == '<') {
  236. if(!strncasecmp(ptr,"</title>",8)) {
  237. ptr += 8;
  238. break;
  239. }
  240. else title[i++] = *ptr++;
  241. }
  242. else title[i++] = *ptr++;
  243. }
  244. title[i] = 0;
  245. snprintf(signature,32,"<dt>%d ",res);
  246. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  247. if(!ptr) {
  248. return "";
  249. }
  250. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  251. if(!buffer) {
  252. return "";
  253. }
  254. unsigned char *body = buffer;
  255. char *mail = (char *)body + 65536;
  256. char *name = mail + 1024;
  257. char *date = name + 1024;
  258. char *encrypted = date + 1024;
  259. while(ptr < end) {
  260. //fprintf(stderr,"%s\n",signature);
  261. std::string resData;
  262. i=0;
  263. mail[0] = 0;
  264. ptr = strstr(ptr,signature);
  265. ptr += strlen(signature);
  266. while(*ptr != '<') ptr++;
  267. ptr++;
  268. const char *endStr;
  269. if(*ptr == 'a' || *ptr == 'A') {
  270. replay:
  271. // has mail
  272. while(*ptr != '"') ptr++;
  273. ptr++;
  274. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  275. ptr += 28;
  276. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  277. encrypted[i] = 0;
  278. i = decryptMail((unsigned char *)mail,encrypted);
  279. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  280. ptr -= reconstruct_len;
  281. char *start = ptr;
  282. memcpy(ptr, "<a href=\"mailto:", 16);
  283. ptr += 16;
  284. memcpy(ptr, mail, i);
  285. ptr = start;
  286. i=0;
  287. goto replay;
  288. }
  289. else {
  290. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  291. while(*ptr != '"') mail[i++] = *ptr++;
  292. mail[i] = 0;
  293. }
  294. endStr = "</a>";
  295. }
  296. else if(*ptr == 'b') {
  297. endStr = NULL;
  298. }
  299. else {
  300. endStr = "</font>";
  301. }
  302. if(endStr) {
  303. ptr = strstr(ptr,"<b>");
  304. ptr += 3;
  305. }
  306. else {
  307. ptr = strchr(ptr,'>');
  308. ptr++;
  309. }
  310. i=0;
  311. while(1) {
  312. if(*ptr == '<') {
  313. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  314. ptr += 4;
  315. if(endStr) ptr += strlen(endStr);
  316. break;
  317. }
  318. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  319. int j=0;
  320. ptr = strstr(ptr,"data-cfemail=\"");
  321. ptr += 14;
  322. while(*ptr != '"') encrypted[j++] = *ptr++;
  323. encrypted[j] = 0;
  324. j = decryptMail((unsigned char *)name+i,encrypted);
  325. i += j;
  326. ptr = strstr(ptr,"</script>");
  327. ptr += 9;
  328. }
  329. else name[i++] = *ptr++;
  330. }
  331. else name[i++] = *ptr++;
  332. }
  333. resData.append(name, i);
  334. resData.append("<>");
  335. if(mail[0]) resData.append(mail);
  336. resData.append("<>");
  337. ptr += 2;
  338. i=0;
  339. while(1) {
  340. if(*ptr == '<') {
  341. if(!strncasecmp(ptr,"<dd>",4)) {
  342. ptr += 4;
  343. break;
  344. }
  345. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  346. memcpy(date+i,"BE:",3);
  347. ptr += 23;
  348. i += 3;
  349. while(*ptr != ')') date[i++] = *ptr++;
  350. date[i++] = '-';
  351. ptr = strchr(ptr,'?');
  352. ptr++;
  353. char *tmp = strstr(ptr,"</a>");
  354. memcpy(date+i,ptr,tmp-ptr);
  355. i += tmp-ptr;
  356. ptr = tmp + 4;
  357. }
  358. else date[i++] = *ptr++;
  359. }
  360. else date[i++] = *ptr++;
  361. }
  362. resData.append(date, i);
  363. resData.append("<>");
  364. i=0;
  365. while(1) {
  366. if(*ptr == '<') {
  367. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  368. ptr += 9;
  369. break;
  370. }
  371. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  372. while(i>0 &&body[i-1] == '\n') i--;
  373. break;
  374. }
  375. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  376. int j=0;
  377. ptr = strstr(ptr,"data-cfemail=\"");
  378. ptr += 14;
  379. while(*ptr != '"') encrypted[j++] = *ptr++;
  380. encrypted[j] = 0;
  381. j = decryptMail(body+i,encrypted);
  382. i += j;
  383. ptr = strstr(ptr,"</script>");
  384. ptr += 9;
  385. }
  386. else if(!strncmp(ptr,"<a href=\"http",13)) {
  387. ptr = strchr(ptr,'>');
  388. ptr++;
  389. char *link = ptr;
  390. ptr = strstr(link,"</a>");
  391. memcpy(body+i,link,ptr-link);
  392. i += ptr-link;
  393. ptr += 4;
  394. }
  395. else if(!strncmp(ptr,"<img src=\"",10)) {
  396. ptr += 10;
  397. char *img = ptr;
  398. ptr = strstr(img,"\">");
  399. memcpy(body+i,img,ptr-img);
  400. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  401. int length = ptr-img;
  402. while(*img != '/') {
  403. img++;
  404. length--;
  405. }
  406. memcpy(body+i,"sssp:",5);
  407. memcpy(body+i+5,img,length);
  408. i += length + 5;
  409. }
  410. else i += ptr-img;
  411. ptr += 2;
  412. }
  413. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  414. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  415. memcpy(body+i," <br>",5);
  416. i += 5;
  417. }
  418. else {
  419. memcpy(body+i,"<br>",4);
  420. i += 4;
  421. }
  422. ptr += 4;
  423. }
  424. else body[i++] = *ptr++;
  425. }
  426. else if(!bbspink && *ptr == ' ') {
  427. if(*(ptr+1) == ' ') ptr++;
  428. else body[i++] = *ptr++;
  429. }
  430. else body[i++] = *ptr++;
  431. }
  432. resData.append((const char *)body ,i);
  433. resData.append("<>");
  434. if(res == 1) resData.append(title);
  435. resData.append("\n");
  436. if(useCache && res == startResNum) {
  437. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  438. bool hit = false;
  439. if(info) {
  440. log_printf(5,"cache hit");
  441. if(info->cachedData.size() == resData.size()) {
  442. log_printf(5,"... size match");
  443. if(info->cachedData == resData) {
  444. log_printf(5,"... content match");
  445. hit = true;
  446. cachedSize = info->cachedSize - resData.size();
  447. }
  448. }
  449. log_printf(5,"\n");
  450. }
  451. if(!hit) {
  452. free(buffer);
  453. return "";
  454. }
  455. }
  456. txt += resData;
  457. res++;
  458. while(*ptr == '\n' || *ptr == '\r') ptr++;
  459. snprintf(signature,32,"<dt>%d ",res);
  460. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  461. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  462. info->lastResNum = res-1;
  463. info->cachedSize = txt.size()+cachedSize;
  464. info->cachedData = resData;
  465. _threadCache->set(_threadKey, info);
  466. log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
  467. if(lastModified) {
  468. *lastModified = 0;
  469. char formattedDate[256];
  470. char *ptr;
  471. ptr = date;
  472. int year = strtol(ptr,&ptr,10);
  473. if(*ptr != '/') break;
  474. ptr++;
  475. int month = strtol(ptr,&ptr,10);
  476. if(*ptr != '/') break;
  477. ptr++;
  478. int day = strtol(ptr,&ptr,10);
  479. if(!*ptr) break;
  480. while(*ptr != ' ' && *ptr != 0) ptr++;
  481. if(!*ptr) break;
  482. ptr++;
  483. int hour = strtol(ptr,&ptr,10);
  484. if(*ptr != ':') break;
  485. ptr++;
  486. int minutes = strtol(ptr,&ptr,10);
  487. if(*ptr != ':') break;
  488. ptr++;
  489. int seconds = strtol(ptr,&ptr,10);
  490. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  491. if(year < 100) year += 2000;
  492. #if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
  493. snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
  494. *lastModified = curl_getdate(formattedDate, NULL);
  495. #else
  496. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  497. struct tm time = {0};
  498. strptime(formattedDate,threadTimestampFmt,&time);
  499. *lastModified = mktime(&time);
  500. #endif
  501. }
  502. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  503. break;
  504. }
  505. }
  506. free(buffer);
  507. return txt;
  508. }
  509. std::string BBS2chProxyHTML2Dat5ch::html2dat(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
  510. {
  511. char *ptr = &html.front();
  512. char *end = &html.back();
  513. std::string txt;
  514. int res = startResNum, i=0;
  515. char signature[64];
  516. char title[1024];
  517. int cachedSize = 0;
  518. char signatureTag[32];
  519. char closeTag[48];
  520. int closeTagLen;
  521. bool isNewHTML = false;
  522. if (html.empty()) return "";
  523. ptr = (char *)memmem_priv(ptr, end-ptr+1, " id=\"threadtitle\">", 18);
  524. if (ptr) {
  525. isNewHTML = true;
  526. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<article id=\"", 13);
  527. if (!ptr2) {
  528. strcpy(signatureTag, "div");
  529. } else {
  530. strcpy(signatureTag, "article");
  531. }
  532. const char *tmp = ptr;
  533. while (*tmp != '<') tmp--;
  534. memcpy(closeTag+2, tmp+1, ptr-tmp-1);
  535. closeTag[0] = '<';
  536. closeTag[1] = '/';
  537. closeTag[ptr-tmp+1] = '>';
  538. closeTag[ptr-tmp+2] = 0;
  539. ptr += 18;
  540. while (1) {
  541. if (*ptr == '<') {
  542. if (!strncasecmp(ptr, closeTag, strlen(closeTag))) {
  543. ptr += strlen(closeTag);
  544. break;
  545. }
  546. else title[i++] = *ptr++;
  547. }
  548. else if(*ptr == '\n') break;
  549. else title[i++] = *ptr++;
  550. }
  551. title[i] = 0;
  552. snprintf(signature, 32, "<%s id=\"%d\"", signatureTag, res);
  553. }
  554. else {
  555. ptr = &html.front();
  556. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  557. if(!ptr) {
  558. return html2dat_old(html, startResNum, lastModified, useCache);
  559. }
  560. else {
  561. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  562. if(ptr2) {
  563. char *tmp = ptr2;
  564. *ptr2 = 0;
  565. while(*ptr2 != '<') ptr2--;
  566. strcpy(signatureTag, ptr2);
  567. *tmp = ' ';
  568. }
  569. else {
  570. return "";
  571. }
  572. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  573. if(ptr2) {
  574. return html2dat_pink(html, startResNum, lastModified, useCache);
  575. }*/
  576. }
  577. ptr += 18;
  578. while(1) {
  579. if(*ptr == '<') {
  580. if(!strncasecmp(ptr,"</h1>",5)) {
  581. ptr += 5;
  582. break;
  583. }
  584. else title[i++] = *ptr++;
  585. }
  586. else if(*ptr == '\n') break;
  587. else title[i++] = *ptr++;
  588. }
  589. title[i] = 0;
  590. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  591. }
  592. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  593. if(!ptr) {
  594. return "";
  595. }
  596. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  597. if(!buffer) {
  598. return "";
  599. }
  600. unsigned char *body = buffer;
  601. char *mail = (char *)body + 65536;
  602. char *name = mail + 1024;
  603. char *date = name + 1024;
  604. char *encrypted = date + 1024;
  605. while(ptr < end) {
  606. //fprintf(stderr,"%s\n",signature);
  607. std::string resData;
  608. i=0;
  609. mail[0] = 0;
  610. if (isNewHTML) ptr = strstr(ptr," class=\"postusername\"><b>");
  611. else ptr = strstr(ptr," class=\"name\"><b>");
  612. if(ptr) {
  613. char *tmp = ptr;
  614. *ptr = 0;
  615. while(*ptr != '<') ptr--;
  616. snprintf(closeTag, 48, "</%s>", ptr+1);
  617. closeTagLen = strlen(closeTag);
  618. if (isNewHTML) ptr = tmp + 25;
  619. else ptr = tmp + 17;
  620. }
  621. else {
  622. break;
  623. }
  624. char endStr[64];
  625. if(!strncmp(ptr,"<a ", 3)) {
  626. char *tmp = ptr;
  627. while (*tmp != '>') tmp++;
  628. ptr = (char *)memmem_priv(ptr, tmp-ptr, "href=\"", 6);
  629. if (!ptr) {
  630. ptr = tmp;
  631. goto mailEnd;
  632. }
  633. replay:
  634. // has mail
  635. while(*ptr != '"') ptr++;
  636. ptr++;
  637. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  638. ptr += 28;
  639. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  640. encrypted[i] = 0;
  641. i = decryptMail((unsigned char *)mail,encrypted);
  642. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  643. ptr -= reconstruct_len;
  644. char *start = ptr;
  645. memcpy(ptr, "<a href=\"mailto:", 16);
  646. ptr += 16;
  647. memcpy(ptr, mail, i);
  648. ptr = start;
  649. i=0;
  650. goto replay;
  651. }
  652. else {
  653. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  654. while(1) {
  655. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  656. ptr = strchr(ptr,'>');
  657. ptr++;
  658. char *link = ptr;
  659. ptr = strstr(link,"</a>");
  660. memcpy(mail+i,link,ptr-link);
  661. i += ptr-link;
  662. ptr += 4;
  663. }
  664. else if(*ptr == '"') break;
  665. else mail[i++] = *ptr++;
  666. }
  667. //while(*ptr != '"') mail[i++] = *ptr++;
  668. mail[i] = 0;
  669. }
  670. mailEnd:
  671. snprintf(endStr,64,"</a></b>%s",closeTag);
  672. while(*ptr != '>') ptr++;
  673. ptr++;
  674. }
  675. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  676. emit font tags anymore and it conflicts with text decorations using "melon point" */
  677. /*else if(!strncmp(ptr,"<font",5)) {
  678. snprintf(endStr,64,"</font></b>%s",closeTag);
  679. while(*ptr != '>') ptr++;
  680. ptr++;
  681. }*/
  682. else {
  683. snprintf(endStr,64,"</b>%s",closeTag);
  684. }
  685. i=0;
  686. while(1) {
  687. if(*ptr == '<') {
  688. if(!strncmp(ptr,endStr,strlen(endStr))) {
  689. ptr += strlen(endStr);
  690. break;
  691. }
  692. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  693. int j=0;
  694. ptr = strstr(ptr,"data-cfemail=\"");
  695. ptr += 14;
  696. while(*ptr != '"') encrypted[j++] = *ptr++;
  697. encrypted[j] = 0;
  698. j = decryptMail((unsigned char *)name+i,encrypted);
  699. i += j;
  700. ptr = strstr(ptr,"</script>");
  701. ptr += 9;
  702. }
  703. else if(!strncmp(ptr,"<a href=\"",9)) {
  704. ptr = strchr(ptr,'>');
  705. ptr++;
  706. char *link = ptr;
  707. ptr = strstr(link,"</a>");
  708. memcpy(name+i,link,ptr-link);
  709. i += ptr-link;
  710. ptr += 4;
  711. }
  712. else name[i++] = *ptr++;
  713. }
  714. else name[i++] = *ptr++;
  715. }
  716. resData.append(name, i);
  717. resData.append("<>");
  718. if(mail[0]) resData.append(mail);
  719. resData.append("<>");
  720. ptr = strstr(ptr," class=\"date\">");
  721. if(ptr) {
  722. char *tmp = ptr;
  723. *ptr = 0;
  724. while(*ptr != '<') ptr--;
  725. snprintf(closeTag, 48, "</%s>", ptr+1);
  726. closeTagLen = strlen(closeTag);
  727. ptr = tmp + 14;
  728. }
  729. else {
  730. break;
  731. }
  732. i=0;
  733. while(1) {
  734. if(*ptr == '<') {
  735. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  736. ptr += closeTagLen;
  737. break;
  738. }
  739. else date[i++] = *ptr++;
  740. }
  741. else date[i++] = *ptr++;
  742. }
  743. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  744. char *tmp = ptr+1;
  745. while(*ptr != ' ') ptr++;
  746. *ptr = 0;
  747. snprintf(closeTag, 48, "</%s>", tmp);
  748. closeTagLen = strlen(closeTag);
  749. ptr += 11;
  750. while(*ptr != '>') ptr++;
  751. ptr++;
  752. date[i++] = ' ';
  753. while(1) {
  754. if(*ptr == '<') {
  755. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  756. ptr += closeTagLen;
  757. break;
  758. }
  759. else date[i++] = *ptr++;
  760. }
  761. else date[i++] = *ptr++;
  762. }
  763. }
  764. if (isNewHTML && !strncmp(ptr, "</span>", 7)) ptr += 7;
  765. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  766. ptr += 14;
  767. while(*ptr != '>') ptr++;
  768. ptr++;
  769. if(!strncmp(ptr,"<a href=\"",9)) {
  770. ptr += 9;
  771. while(*ptr != '/' && *ptr != '"') ptr++;
  772. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  773. memcpy(date+i," BE:",4);
  774. i += 4;
  775. ptr += 18;
  776. while(*ptr != '"') date[i++] = *ptr++;
  777. date[i++] = '-';
  778. ptr = strchr(ptr,'?');
  779. ptr++;
  780. char *tmp = strstr(ptr,"</a>");
  781. memcpy(date+i,ptr,tmp-ptr);
  782. i += tmp-ptr;
  783. ptr = tmp + 4;
  784. }
  785. }
  786. }
  787. resData.append(date, i);
  788. resData.append("<>");
  789. if (isNewHTML) {
  790. ptr = strstr(ptr," class=\"post-content\">");
  791. if (!ptr) {
  792. break;
  793. }
  794. else {
  795. char *tmp = ptr;
  796. char postContentTag[32];
  797. while (*tmp != '<') tmp--;
  798. memcpy(postContentTag, tmp+1, ptr-tmp-1);
  799. postContentTag[ptr-tmp-1] = 0;
  800. ptr += 22;
  801. if (!strncasecmp(ptr, "<span class=\"AA\">", 17)) {
  802. snprintf(closeTag, 48, "</span></%s>", postContentTag);
  803. closeTagLen = strlen(closeTag);
  804. ptr += 17;
  805. }
  806. else {
  807. snprintf(closeTag, 48, "</%s>", postContentTag);
  808. closeTagLen = strlen(closeTag);
  809. }
  810. }
  811. }
  812. else if(!strcmp(signatureTag,"<div")) {
  813. ptr = strstr(ptr,"<div class=\"message\">");
  814. if(!ptr) {
  815. break;
  816. }
  817. else {
  818. ptr += 21;
  819. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  820. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  821. strcpy(closeTag,"</span></span></div>");
  822. closeTagLen = 20;
  823. ptr += 22+17;
  824. }
  825. else {
  826. strcpy(closeTag,"</span></div>");
  827. closeTagLen = 13;
  828. ptr += 22;
  829. }
  830. }
  831. else {
  832. strcpy(closeTag,"</div>");
  833. closeTagLen = 6;
  834. }
  835. }
  836. }
  837. else {
  838. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  839. if(!ptr) {
  840. break;
  841. }
  842. strcpy(closeTag,"</dd>");
  843. closeTagLen = 5;
  844. ptr += 22;
  845. }
  846. i=0;
  847. while(1) {
  848. if(*ptr == '<') {
  849. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  850. ptr += closeTagLen;
  851. break;
  852. }
  853. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  854. int j=0;
  855. ptr = strstr(ptr,"data-cfemail=\"");
  856. ptr += 14;
  857. while(*ptr != '"') encrypted[j++] = *ptr++;
  858. encrypted[j] = 0;
  859. j = decryptMail(body+i,encrypted);
  860. i += j;
  861. ptr = strstr(ptr,"</script>");
  862. ptr += 9;
  863. }
  864. else if(!strncmp(ptr,"<a ",3)) {
  865. char *tmp = strchr(ptr,'>');
  866. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  867. char *link = tmp+1;
  868. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  869. while(ptr < link) {
  870. if(!strncmp(ptr," class=\"",8)) {
  871. ptr += 8;
  872. while(*ptr != '"' && *ptr != '>') ptr++;
  873. if(*ptr == '"') ptr++;
  874. }
  875. else body[i++] = *ptr++;
  876. }
  877. }
  878. else {
  879. ptr = strstr(link,"</a>");
  880. memcpy(body+i,link,ptr-link);
  881. i += ptr-link;
  882. ptr += 4;
  883. }
  884. }
  885. else if(!strncmp(ptr,"<img src=\"",10)) {
  886. ptr += 10;
  887. char *img = ptr;
  888. ptr = strstr(img,"\">");
  889. memcpy(body+i,img,ptr-img);
  890. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  891. int length = ptr-img;
  892. while(*img != '/') {
  893. img++;
  894. length--;
  895. }
  896. memcpy(body+i,"sssp:",5);
  897. memcpy(body+i+5,img,length);
  898. i += length + 5;
  899. }
  900. else i += ptr-img;
  901. ptr += 2;
  902. }
  903. else if(!strncmp(ptr,"<br>",4)) {
  904. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  905. memcpy(body+i," <br>",5);
  906. i += 5;
  907. }
  908. else {
  909. memcpy(body+i,"<br>",4);
  910. i += 4;
  911. }
  912. ptr += 4;
  913. }
  914. else body[i++] = *ptr++;
  915. }
  916. else body[i++] = *ptr++;
  917. }
  918. resData.append((const char *)body, i);
  919. resData.append("<>");
  920. if(res == 1) resData.append(title);
  921. resData.append("\n");
  922. if(useCache && res == startResNum) {
  923. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  924. bool hit = false;
  925. if(info) {
  926. log_printf(5,"cache hit");
  927. if(info->cachedData.size() == resData.size()) {
  928. log_printf(5,"... size match");
  929. if(info->cachedData == resData) {
  930. log_printf(5,"... content match");
  931. hit = true;
  932. cachedSize = info->cachedSize - resData.size();
  933. }
  934. }
  935. log_printf(5,"\n");
  936. }
  937. if(!hit) {
  938. free(buffer);
  939. return "";
  940. }
  941. }
  942. txt += resData;
  943. res++;
  944. while(*ptr == '\n' || *ptr == '\r') ptr++;
  945. if (isNewHTML) snprintf(signature, 64, "<%s id=\"", signatureTag);
  946. else snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  947. while (1) {
  948. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  949. if (!isNewHTML || !ptr) break;
  950. char *tmp = ptr;
  951. while (*tmp != '>') tmp++;
  952. tmp = (char *)memmem_priv(ptr, tmp-ptr, "data-date", 9);
  953. if (tmp) break;
  954. ptr++;
  955. }
  956. if(ptr) {
  957. int next = atoi(ptr+strlen(signature));
  958. if(next >= res) {
  959. while(next > res) {
  960. txt += "broken<><>broken<> broken <>\n";
  961. res++;
  962. }
  963. }
  964. else ptr = NULL;
  965. }
  966. if(!ptr) {
  967. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  968. info->lastResNum = res-1;
  969. info->cachedSize = txt.size()+cachedSize;
  970. info->cachedData = resData;
  971. _threadCache->set(_threadKey, info);
  972. log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
  973. if(lastModified) {
  974. *lastModified = 0;
  975. char formattedDate[256];
  976. char *ptr;
  977. ptr = date;
  978. int year = strtol(ptr,&ptr,10);
  979. if(*ptr != '/') break;
  980. ptr++;
  981. int month = strtol(ptr,&ptr,10);
  982. if(*ptr != '/') break;
  983. ptr++;
  984. int day = strtol(ptr,&ptr,10);
  985. if(!*ptr) break;
  986. while(*ptr != ' ' && *ptr != 0) ptr++;
  987. if(!*ptr) break;
  988. ptr++;
  989. int hour = strtol(ptr,&ptr,10);
  990. if(*ptr != ':') break;
  991. ptr++;
  992. int minutes = strtol(ptr,&ptr,10);
  993. if(*ptr != ':') break;
  994. ptr++;
  995. int seconds = strtol(ptr,&ptr,10);
  996. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  997. if(year < 100) year += 2000;
  998. #if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
  999. snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
  1000. *lastModified = curl_getdate(formattedDate, NULL);
  1001. #else
  1002. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  1003. struct tm time = {0};
  1004. strptime(formattedDate,threadTimestampFmt,&time);
  1005. *lastModified = mktime(&time);
  1006. #endif
  1007. }
  1008. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  1009. break;
  1010. }
  1011. }
  1012. free(buffer);
  1013. return txt;
  1014. }
  1015. std::string BBS2chProxyHTML2DatTalk::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
  1016. {
  1017. std::string out;
  1018. if (!json || json_type(json) != JSONObject) {
  1019. return "";
  1020. }
  1021. JSON_Object *root = json_object(json);
  1022. const char *title = json_object_dotget_string(root, "data.title");
  1023. const char *quoteSource = json_object_dotget_string(root, "data.quote_source");
  1024. JSON_Array *comments = json_object_dotget_array(root, "data.comments");
  1025. if (!title || !comments) {
  1026. return "";
  1027. }
  1028. if (startFrom < 1) startFrom = 1;
  1029. int prevNumber = startFrom - 1;
  1030. time_t lastModified = 0;
  1031. size_t cachedSize = 0;
  1032. std::string lastLine;
  1033. for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
  1034. std::stringstream line;
  1035. JSON_Object *comment = json_array_get_object(comments, i);
  1036. if (!comment) continue;
  1037. int number = json_object_get_number(comment, "number");
  1038. if (number < startFrom) continue;
  1039. const char *name = json_object_dotget_string(comment, "writer.name");
  1040. const char *trip = json_object_dotget_string(comment, "writer.trip");
  1041. const char *slip = json_object_dotget_string(comment, "writer.slip");
  1042. const char *id = json_object_dotget_string(comment, "writer.id");
  1043. time_t timestamp = json_object_get_number(comment, "timestamp");
  1044. const char *body = json_object_get_string(comment, "body");
  1045. if (timestamp > lastModified) lastModified = timestamp;
  1046. for (int j=prevNumber+1; j<number; j++) {
  1047. out += "broken<><>broken<> broken <>\n";
  1048. }
  1049. if (name) {
  1050. std::string tmp(name);
  1051. escapeForHTML(tmp);
  1052. line << tmp;
  1053. if (trip) line << "</b>◆" << trip << "<b>";
  1054. if (slip) line << " </b>(" << slip << ")<b>";
  1055. }
  1056. else line << "削除";
  1057. line << "<><>"; //mail cannot be obtained from json!
  1058. if (timestamp) {
  1059. char dateStr[256] = "";
  1060. struct tm timestamp_tm = {0};
  1061. timestamp += 32400;
  1062. gmtime_r(&timestamp, &timestamp_tm);
  1063. strftime(dateStr, 256, "%Y/%m/%d(", &timestamp_tm);
  1064. line << dateStr << wdays[timestamp_tm.tm_wday] << ") ";
  1065. strftime(dateStr, 256, "%H:%M:%S", &timestamp_tm);
  1066. line << dateStr;
  1067. if (id) {
  1068. line << " ID:" << id;
  1069. }
  1070. }
  1071. else line << "削除";
  1072. line << "<>";
  1073. if (body) {
  1074. std::string tmp(body);
  1075. escapeForHTML(tmp);
  1076. replaceAll(tmp, "\n", " <br> ");
  1077. line << " " << tmp;
  1078. if (number == 1 && quoteSource) {
  1079. line << " <br> <br> 出典 " << quoteSource;
  1080. }
  1081. line << " ";
  1082. }
  1083. else line << "削除";
  1084. line << "<>";
  1085. if (number == 1) {
  1086. std::string tmp(title);
  1087. escapeForHTML(tmp);
  1088. line << tmp;
  1089. }
  1090. line << "\n";
  1091. prevNumber = number;
  1092. char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
  1093. if (lineSJIS) {
  1094. lastLine = lineSJIS;
  1095. out += lastLine;
  1096. free(lineSJIS);
  1097. } else {
  1098. lastLine = "broken<><>broken<> broken <>\n";
  1099. out += lastLine;
  1100. }
  1101. if (useCache && startFrom == number) {
  1102. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  1103. bool hit = false;
  1104. if (info) {
  1105. log_printf(5, "cache hit");
  1106. if (info->cachedData.size() == lastLine.size()) {
  1107. log_printf(5, "... size match");
  1108. if (info->cachedData == lastLine) {
  1109. log_printf(5, "... content match");
  1110. hit = true;
  1111. cachedSize = info->cachedSize - lastLine.size();
  1112. }
  1113. }
  1114. log_printf(5, "\n");
  1115. }
  1116. if (!hit) {
  1117. return "";
  1118. }
  1119. }
  1120. }
  1121. if (!lastLine.empty()) {
  1122. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1123. info->lastResNum = prevNumber;
  1124. info->cachedSize = out.size() + cachedSize;
  1125. info->cachedData = lastLine;
  1126. _threadCache->set(_threadKey, info);
  1127. log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
  1128. }
  1129. if (lastModifiedOut) *lastModifiedOut = lastModified;
  1130. return out;
  1131. }
  1132. std::string BBS2chProxyHTML2Dat5chItest::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
  1133. {
  1134. std::string out;
  1135. if (!json || json_type(json) != JSONObject) {
  1136. return "";
  1137. }
  1138. JSON_Object *root = json_object(json);
  1139. JSON_Array *threadMeta = json_object_get_array(root, "thread");
  1140. time_t lastModified = json_array_get_number(threadMeta, 0);
  1141. const char *boardAndKey = json_array_get_string(threadMeta, 3);
  1142. const char *title = json_array_get_string(threadMeta, 5);
  1143. JSON_Array *comments = json_object_get_array(root, "comments");
  1144. if (!title || !*title || !comments) {
  1145. return "";
  1146. }
  1147. if (startFrom < 1) startFrom = 1;
  1148. int prevNumber = startFrom - 1;
  1149. size_t cachedSize = 0;
  1150. std::string lastLine;
  1151. for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
  1152. std::stringstream line;
  1153. JSON_Array *comment = json_array_get_array(comments, i);
  1154. if (!comment) continue;
  1155. int number = json_array_get_number(comment, 0);
  1156. if (number < startFrom) continue;
  1157. const char *name = json_array_get_string(comment, 1);
  1158. const char *mail = json_array_get_string(comment, 2);
  1159. const char *date = json_array_get_string(comment, 3);
  1160. const char *id = json_array_get_string(comment, 4);
  1161. const char *be = json_array_get_string(comment, 5);
  1162. const char *body = json_array_get_string(comment, 6);
  1163. for (int j=prevNumber+1; j<number; j++) {
  1164. out += "broken<><>broken<> broken <>\n";
  1165. }
  1166. if (name) line << name;
  1167. else line << "削除";
  1168. line << "<>";
  1169. if (mail) line << mail;
  1170. else line << "削除";
  1171. line << "<>";
  1172. if (date) {
  1173. line << date;
  1174. if (id && *id) {
  1175. line << " ID:" << id;
  1176. }
  1177. if (be && *be) {
  1178. line << " BE:" << be;
  1179. }
  1180. }
  1181. else line << "削除";
  1182. line << "<>";
  1183. if (body) {
  1184. const char *ptr = strstr(body, "&gt;&gt;");
  1185. const char *start = body;
  1186. while (ptr) {
  1187. const char *tmp = ptr;
  1188. unsigned int num = strtoul(ptr+8, (char **)&ptr, 10);
  1189. if (num > 0) {
  1190. if (tmp != start) line << std::string(start, tmp-start);
  1191. line << "<a href=\"../test/read.cgi/" << boardAndKey << "/" << num << "\" rel=\"noopener noreferrer\" target=\"_blank\">";
  1192. line << std::string(tmp, ptr-tmp);
  1193. line << "</a>";
  1194. }
  1195. else line << std::string(start, ptr-start);
  1196. start = ptr;
  1197. ptr = strstr(start, "&gt;&gt;");
  1198. }
  1199. line << start;
  1200. }
  1201. else line << "削除";
  1202. line << "<>";
  1203. if (number == 1) {
  1204. line << title;
  1205. }
  1206. line << "\n";
  1207. prevNumber = number;
  1208. char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
  1209. if (lineSJIS) {
  1210. lastLine = lineSJIS;
  1211. out += lastLine;
  1212. free(lineSJIS);
  1213. } else {
  1214. lastLine = "broken<><>broken<> broken <>\n";
  1215. out += lastLine;
  1216. }
  1217. if (useCache && startFrom == number) {
  1218. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  1219. bool hit = false;
  1220. if (info) {
  1221. log_printf(5, "cache hit");
  1222. if (info->cachedData.size() == lastLine.size()) {
  1223. log_printf(5, "... size match");
  1224. if (info->cachedData == lastLine) {
  1225. log_printf(5, "... content match");
  1226. hit = true;
  1227. cachedSize = info->cachedSize - lastLine.size();
  1228. }
  1229. }
  1230. log_printf(5, "\n");
  1231. }
  1232. if (!hit) {
  1233. return "";
  1234. }
  1235. }
  1236. }
  1237. if (!lastLine.empty()) {
  1238. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1239. info->lastResNum = prevNumber;
  1240. info->cachedSize = out.size() + cachedSize;
  1241. info->cachedData = lastLine;
  1242. _threadCache->set(_threadKey, info);
  1243. log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
  1244. }
  1245. if (lastModifiedOut) *lastModifiedOut = lastModified;
  1246. return out;
  1247. }