BBS2chProxyHTML2Dat.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283
  1. #include <sstream>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <unistd.h>
  5. #include "BBS2chProxyHTML2Dat.h"
  6. #include "stringEncodingConverter.h"
  7. #include "utils.h"
  8. extern char *user_agent;
  9. extern void log_printf(int level, const char *format ...);
  10. #ifdef _WIN32
  11. #define gmtime_r(a, b) gmtime_s(b, a)
  12. #endif
  13. static const char threadTimestampFmt[] = "%Y/%m/%d %H:%M:%S %Z";
  14. static const char *wdays[7] = {
  15. "日",
  16. "月",
  17. "火",
  18. "水",
  19. "木",
  20. "金",
  21. "土"
  22. };
  23. static int decryptMail(unsigned char *decrypted, char *encrypted)
  24. {
  25. char current[5]="0x";
  26. unsigned char *ptr = decrypted;
  27. current[2] = encrypted[0];
  28. current[3] = encrypted[1];
  29. unsigned int r = strtol(current,NULL,16);
  30. int len = strlen(encrypted);
  31. int n = 2;
  32. for(;n<len;n+=2) {
  33. current[2] = encrypted[n];
  34. current[3] = encrypted[n+1];
  35. unsigned int i = strtol(current,NULL,16);
  36. *ptr++ = i^r;
  37. }
  38. *ptr = 0;
  39. //fprintf(stderr,"%s->%s\n",encrypted,decrypted);
  40. return ptr - decrypted;
  41. }
  42. static void replaceAll(std::string &input, const std::string &oldValue, const std::string &newValue)
  43. {
  44. if (!oldValue.empty()) {
  45. size_t pos = 0;
  46. while ((pos = input.find(oldValue, pos)) != std::string::npos) {
  47. input.replace(pos, oldValue.size(), newValue);
  48. pos += newValue.size();
  49. }
  50. }
  51. }
  52. static void escapeForHTML(std::string &input)
  53. {
  54. replaceAll(input, "&", "&amp;");
  55. replaceAll(input, "<", "&lt;");
  56. replaceAll(input, ">", "&gt;");
  57. replaceAll(input, "\"", "&quot;");
  58. replaceAll(input, "'", "&#39;");
  59. }
  60. BBS2chProxyHTML2Dat5ch::BBS2chProxyHTML2Dat5ch(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, bool useHttps, CURL *curl)
  61. : IBBS2chProxyHTML2Dat(cache, identifier, curl)
  62. {
  63. _url = useHttps ? "https://" : "http://";
  64. _url += identifier.host;
  65. _url += "/test/read.cgi/";
  66. _url += identifier.board;
  67. _url += '/';
  68. _url += identifier.key;
  69. _url += '/';
  70. }
  71. BBS2chProxyHTML2DatTalk::BBS2chProxyHTML2DatTalk(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  72. : IBBS2chProxyHTML2Dat(cache, identifier, curl), _cachedJson(NULL)
  73. {
  74. _url = "https://talk.jp/api/boards/";
  75. _url += identifier.board;
  76. _url += "/threads/";
  77. _url += identifier.key;
  78. }
  79. BBS2chProxyHTML2DatTalkHTML::BBS2chProxyHTML2DatTalkHTML(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  80. : BBS2chProxyHTML2DatTalk(cache, identifier, curl)
  81. {
  82. _url = "https://talk.jp/boards/";
  83. _url += identifier.board;
  84. _url += '/';
  85. _url += identifier.key;
  86. _url += '/';
  87. }
  88. BBS2chProxyHTML2Dat5chItest::BBS2chProxyHTML2Dat5chItest(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
  89. : BBS2chProxyHTML2DatTalk(cache, identifier, curl)
  90. {
  91. _url = "https://itest.5ch.net/public/newapi/client.php?subdomain=";
  92. _url += identifier.hostPrefix;
  93. _url += "&board=";
  94. _url += identifier.board;
  95. _url += "&dat=";
  96. _url += identifier.key;
  97. }
  98. std::vector<char> IBBS2chProxyHTML2Dat::getHtmlFromURL(const std::string &url, long *outStatusCode)
  99. {
  100. CURLcode res;
  101. long statusCode = 0;
  102. std::vector<char> html;
  103. configureCurlHandle(_curl);
  104. curl_easy_setopt(_curl, CURLOPT_URL, url.c_str());
  105. curl_easy_setopt(_curl, CURLOPT_ENCODING, "");
  106. curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, write_callback_download);
  107. curl_easy_setopt(_curl, CURLOPT_WRITEDATA, &html);
  108. curl_easy_setopt(_curl, CURLOPT_FOLLOWLOCATION, 1L);
  109. if (user_agent) {
  110. curl_easy_setopt(_curl, CURLOPT_USERAGENT, user_agent);
  111. }
  112. else if (!_userAgent.empty()) {
  113. curl_easy_setopt(_curl, CURLOPT_USERAGENT, _userAgent.c_str());
  114. }
  115. res = curl_easy_perform(_curl);
  116. if (res == CURLE_OK) {
  117. curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statusCode);
  118. if (statusCode != 200) html.clear();
  119. } else {
  120. log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), url.c_str());
  121. }
  122. curl_easy_reset(_curl);
  123. if (outStatusCode) *outStatusCode = statusCode;
  124. return html;
  125. }
  126. void IBBS2chProxyHTML2Dat::setRequestHeaders(BBS2chProxyHttpHeaders &headers)
  127. {
  128. if (headers.has("User-Agent")) {
  129. _userAgent = headers.get("User-Agent");
  130. }
  131. }
  132. const std::string& IBBS2chProxyHTML2Dat::getKey()
  133. {
  134. return _threadKey;
  135. }
  136. std::string BBS2chProxyHTML2Dat5ch::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  137. {
  138. std::string tmpURL(_url);
  139. if (startFrom > 1) {
  140. std::ostringstream ss;
  141. ss << startFrom << "-n";
  142. tmpURL += ss.str();
  143. } else {
  144. tmpURL += "1-";
  145. }
  146. curl_easy_setopt(_curl, CURLOPT_COOKIE, "5chClassic=on");
  147. std::vector<char> html = getHtmlFromURL(tmpURL, outStatusCode);
  148. return html2dat(html, startFrom, lastModifiedOut, useCache);
  149. }
  150. std::string BBS2chProxyHTML2DatTalk::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  151. {
  152. if (!_cachedJson) {
  153. std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
  154. if (json.empty()) return "";
  155. json.push_back(0);
  156. _cachedJson = json_parse_string(&json.front());
  157. }
  158. if (!_cachedJson) return "";
  159. return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
  160. }
  161. std::string BBS2chProxyHTML2DatTalkHTML::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  162. {
  163. if (!_cachedJson) {
  164. std::vector<char> html = getHtmlFromURL(_url, outStatusCode);
  165. if (html.empty()) return "";
  166. html.push_back(0);
  167. const char *ptr = strstr(&html.front(), "id=\"__NEXT_DATA__\"");
  168. if (ptr) {
  169. ptr += strlen("id=\"__NEXT_DATA__\"");
  170. while (*ptr != '>' && *ptr != 0) ptr++;
  171. if (*ptr) {
  172. const char *end = strstr(++ptr, "</script>");
  173. if (end) {
  174. std::string jsonStr(ptr, end-ptr);
  175. _cachedJson = json_parse_string(jsonStr.c_str());
  176. }
  177. }
  178. }
  179. }
  180. if (!_cachedJson) return "";
  181. JSON_Value *threadData = json_object_dotget_value(json_object(_cachedJson), "props.pageProps.threadData");
  182. return json2dat(threadData, startFrom, lastModifiedOut, useCache);
  183. }
  184. std::string BBS2chProxyHTML2Dat5chItest::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
  185. {
  186. if (!_cachedJson) {
  187. std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
  188. if (json.empty()) return "";
  189. json.push_back(0);
  190. _cachedJson = json_parse_string(&json.front());
  191. }
  192. if (!_cachedJson) return "";
  193. return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
  194. }
  195. std::string BBS2chProxyHTML2Dat5ch::html2dat_old(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
  196. {
  197. char *ptr = &html.front();
  198. char *end = &html.back();
  199. std::string txt;
  200. int res = startResNum, i=0;
  201. char signature[32];
  202. char title[1024];
  203. int cachedSize = 0;
  204. bool bbspink = strstr(_threadKey.c_str(),"bbspink.com") ? true : false;
  205. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
  206. if(!ptr) {
  207. return "";
  208. }
  209. ptr += 7;
  210. while(1) {
  211. if(*ptr == '<') {
  212. if(!strncasecmp(ptr,"</title>",8)) {
  213. ptr += 8;
  214. break;
  215. }
  216. else title[i++] = *ptr++;
  217. }
  218. else title[i++] = *ptr++;
  219. }
  220. title[i] = 0;
  221. snprintf(signature,32,"<dt>%d ",res);
  222. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  223. if(!ptr) {
  224. return "";
  225. }
  226. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  227. if(!buffer) {
  228. return "";
  229. }
  230. unsigned char *body = buffer;
  231. char *mail = (char *)body + 65536;
  232. char *name = mail + 1024;
  233. char *date = name + 1024;
  234. char *encrypted = date + 1024;
  235. while(ptr < end) {
  236. //fprintf(stderr,"%s\n",signature);
  237. std::string resData;
  238. i=0;
  239. mail[0] = 0;
  240. ptr = strstr(ptr,signature);
  241. ptr += strlen(signature);
  242. while(*ptr != '<') ptr++;
  243. ptr++;
  244. const char *endStr;
  245. if(*ptr == 'a' || *ptr == 'A') {
  246. replay:
  247. // has mail
  248. while(*ptr != '"') ptr++;
  249. ptr++;
  250. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  251. ptr += 28;
  252. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  253. encrypted[i] = 0;
  254. i = decryptMail((unsigned char *)mail,encrypted);
  255. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  256. ptr -= reconstruct_len;
  257. char *start = ptr;
  258. memcpy(ptr, "<a href=\"mailto:", 16);
  259. ptr += 16;
  260. memcpy(ptr, mail, i);
  261. ptr = start;
  262. i=0;
  263. goto replay;
  264. }
  265. else {
  266. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  267. while(*ptr != '"') mail[i++] = *ptr++;
  268. mail[i] = 0;
  269. }
  270. endStr = "</a>";
  271. }
  272. else if(*ptr == 'b') {
  273. endStr = NULL;
  274. }
  275. else {
  276. endStr = "</font>";
  277. }
  278. if(endStr) {
  279. ptr = strstr(ptr,"<b>");
  280. ptr += 3;
  281. }
  282. else {
  283. ptr = strchr(ptr,'>');
  284. ptr++;
  285. }
  286. i=0;
  287. while(1) {
  288. if(*ptr == '<') {
  289. if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
  290. ptr += 4;
  291. if(endStr) ptr += strlen(endStr);
  292. break;
  293. }
  294. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  295. int j=0;
  296. ptr = strstr(ptr,"data-cfemail=\"");
  297. ptr += 14;
  298. while(*ptr != '"') encrypted[j++] = *ptr++;
  299. encrypted[j] = 0;
  300. j = decryptMail((unsigned char *)name+i,encrypted);
  301. i += j;
  302. ptr = strstr(ptr,"</script>");
  303. ptr += 9;
  304. }
  305. else name[i++] = *ptr++;
  306. }
  307. else name[i++] = *ptr++;
  308. }
  309. resData.append(name, i);
  310. resData.append("<>");
  311. if(mail[0]) resData.append(mail);
  312. resData.append("<>");
  313. ptr += 2;
  314. i=0;
  315. while(1) {
  316. if(*ptr == '<') {
  317. if(!strncasecmp(ptr,"<dd>",4)) {
  318. ptr += 4;
  319. break;
  320. }
  321. else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
  322. memcpy(date+i,"BE:",3);
  323. ptr += 23;
  324. i += 3;
  325. while(*ptr != ')') date[i++] = *ptr++;
  326. date[i++] = '-';
  327. ptr = strchr(ptr,'?');
  328. ptr++;
  329. char *tmp = strstr(ptr,"</a>");
  330. memcpy(date+i,ptr,tmp-ptr);
  331. i += tmp-ptr;
  332. ptr = tmp + 4;
  333. }
  334. else date[i++] = *ptr++;
  335. }
  336. else date[i++] = *ptr++;
  337. }
  338. resData.append(date, i);
  339. resData.append("<>");
  340. i=0;
  341. while(1) {
  342. if(*ptr == '<') {
  343. if(!strncasecmp(ptr,"<br><br>\n",9)) {
  344. ptr += 9;
  345. break;
  346. }
  347. else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
  348. while(i>0 &&body[i-1] == '\n') i--;
  349. break;
  350. }
  351. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  352. int j=0;
  353. ptr = strstr(ptr,"data-cfemail=\"");
  354. ptr += 14;
  355. while(*ptr != '"') encrypted[j++] = *ptr++;
  356. encrypted[j] = 0;
  357. j = decryptMail(body+i,encrypted);
  358. i += j;
  359. ptr = strstr(ptr,"</script>");
  360. ptr += 9;
  361. }
  362. else if(!strncmp(ptr,"<a href=\"http",13)) {
  363. ptr = strchr(ptr,'>');
  364. ptr++;
  365. char *link = ptr;
  366. ptr = strstr(link,"</a>");
  367. memcpy(body+i,link,ptr-link);
  368. i += ptr-link;
  369. ptr += 4;
  370. }
  371. else if(!strncmp(ptr,"<img src=\"",10)) {
  372. ptr += 10;
  373. char *img = ptr;
  374. ptr = strstr(img,"\">");
  375. memcpy(body+i,img,ptr-img);
  376. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  377. int length = ptr-img;
  378. while(*img != '/') {
  379. img++;
  380. length--;
  381. }
  382. memcpy(body+i,"sssp:",5);
  383. memcpy(body+i+5,img,length);
  384. i += length + 5;
  385. }
  386. else i += ptr-img;
  387. ptr += 2;
  388. }
  389. else if(!bbspink && !strncmp(ptr,"<br>",4)) {
  390. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  391. memcpy(body+i," <br>",5);
  392. i += 5;
  393. }
  394. else {
  395. memcpy(body+i,"<br>",4);
  396. i += 4;
  397. }
  398. ptr += 4;
  399. }
  400. else body[i++] = *ptr++;
  401. }
  402. else if(!bbspink && *ptr == ' ') {
  403. if(*(ptr+1) == ' ') ptr++;
  404. else body[i++] = *ptr++;
  405. }
  406. else body[i++] = *ptr++;
  407. }
  408. resData.append((const char *)body ,i);
  409. resData.append("<>");
  410. if(res == 1) resData.append(title);
  411. resData.append("\n");
  412. if(useCache && res == startResNum) {
  413. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  414. bool hit = false;
  415. if(info) {
  416. log_printf(5,"cache hit");
  417. if(info->cachedData.size() == resData.size()) {
  418. log_printf(5,"... size match");
  419. if(info->cachedData == resData) {
  420. log_printf(5,"... content match");
  421. hit = true;
  422. cachedSize = info->cachedSize - resData.size();
  423. }
  424. }
  425. log_printf(5,"\n");
  426. }
  427. if(!hit) {
  428. free(buffer);
  429. return "";
  430. }
  431. }
  432. txt += resData;
  433. res++;
  434. while(*ptr == '\n' || *ptr == '\r') ptr++;
  435. snprintf(signature,32,"<dt>%d ",res);
  436. if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
  437. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  438. info->lastResNum = res-1;
  439. info->cachedSize = txt.size()+cachedSize;
  440. info->cachedData = resData;
  441. _threadCache->set(_threadKey, info);
  442. log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
  443. if(lastModified) {
  444. *lastModified = 0;
  445. char formattedDate[256];
  446. char *ptr;
  447. ptr = date;
  448. int year = strtol(ptr,&ptr,10);
  449. if(*ptr != '/') break;
  450. ptr++;
  451. int month = strtol(ptr,&ptr,10);
  452. if(*ptr != '/') break;
  453. ptr++;
  454. int day = strtol(ptr,&ptr,10);
  455. if(!*ptr) break;
  456. while(*ptr != ' ' && *ptr != 0) ptr++;
  457. if(!*ptr) break;
  458. ptr++;
  459. int hour = strtol(ptr,&ptr,10);
  460. if(*ptr != ':') break;
  461. ptr++;
  462. int minutes = strtol(ptr,&ptr,10);
  463. if(*ptr != ':') break;
  464. ptr++;
  465. int seconds = strtol(ptr,&ptr,10);
  466. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  467. if(year < 100) year += 2000;
  468. #if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
  469. snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
  470. *lastModified = curl_getdate(formattedDate, NULL);
  471. #else
  472. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  473. struct tm time = {0};
  474. strptime(formattedDate,threadTimestampFmt,&time);
  475. *lastModified = mktime(&time);
  476. #endif
  477. }
  478. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  479. break;
  480. }
  481. }
  482. free(buffer);
  483. return txt;
  484. }
  485. std::string BBS2chProxyHTML2Dat5ch::html2dat(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
  486. {
  487. char *ptr = &html.front();
  488. char *end = &html.back();
  489. std::string txt;
  490. int res = startResNum, i=0;
  491. char signature[64];
  492. char title[1024];
  493. int cachedSize = 0;
  494. char signatureTag[32];
  495. char closeTag[48];
  496. int closeTagLen;
  497. bool isNewHTML = false;
  498. if (html.empty()) return "";
  499. ptr = (char *)memmem_priv(ptr, end-ptr+1, " id=\"threadtitle\">", 18);
  500. if (ptr) {
  501. isNewHTML = true;
  502. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<article id=\"", 13);
  503. if (!ptr2) {
  504. strcpy(signatureTag, "div");
  505. } else {
  506. strcpy(signatureTag, "article");
  507. }
  508. const char *tmp = ptr;
  509. while (*tmp != '<') tmp--;
  510. memcpy(closeTag+2, tmp+1, ptr-tmp-1);
  511. closeTag[0] = '<';
  512. closeTag[1] = '/';
  513. closeTag[ptr-tmp+1] = '>';
  514. closeTag[ptr-tmp+2] = 0;
  515. ptr += 18;
  516. while (1) {
  517. if (*ptr == '<') {
  518. if (!strncasecmp(ptr, closeTag, strlen(closeTag))) {
  519. ptr += strlen(closeTag);
  520. break;
  521. }
  522. else title[i++] = *ptr++;
  523. }
  524. else if(*ptr == '\n') break;
  525. else title[i++] = *ptr++;
  526. }
  527. title[i] = 0;
  528. snprintf(signature, 32, "<%s id=\"%d\"", signatureTag, res);
  529. }
  530. else {
  531. ptr = &html.front();
  532. ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
  533. if(!ptr) {
  534. return html2dat_old(html, startResNum, lastModified, useCache);
  535. }
  536. else {
  537. char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
  538. if(ptr2) {
  539. char *tmp = ptr2;
  540. *ptr2 = 0;
  541. while(*ptr2 != '<') ptr2--;
  542. strcpy(signatureTag, ptr2);
  543. *tmp = ' ';
  544. }
  545. else {
  546. return "";
  547. }
  548. /*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
  549. if(ptr2) {
  550. return html2dat_pink(html, startResNum, lastModified, useCache);
  551. }*/
  552. }
  553. ptr += 18;
  554. while(1) {
  555. if(*ptr == '<') {
  556. if(!strncasecmp(ptr,"</h1>",5)) {
  557. ptr += 5;
  558. break;
  559. }
  560. else title[i++] = *ptr++;
  561. }
  562. else if(*ptr == '\n') break;
  563. else title[i++] = *ptr++;
  564. }
  565. title[i] = 0;
  566. snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
  567. }
  568. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  569. if(!ptr) {
  570. return "";
  571. }
  572. unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
  573. if(!buffer) {
  574. return "";
  575. }
  576. unsigned char *body = buffer;
  577. char *mail = (char *)body + 65536;
  578. char *name = mail + 1024;
  579. char *date = name + 1024;
  580. char *encrypted = date + 1024;
  581. while(ptr < end) {
  582. //fprintf(stderr,"%s\n",signature);
  583. std::string resData;
  584. i=0;
  585. mail[0] = 0;
  586. if (isNewHTML) ptr = strstr(ptr," class=\"postusername\"><b>");
  587. else ptr = strstr(ptr," class=\"name\"><b>");
  588. if(ptr) {
  589. char *tmp = ptr;
  590. *ptr = 0;
  591. while(*ptr != '<') ptr--;
  592. snprintf(closeTag, 48, "</%s>", ptr+1);
  593. closeTagLen = strlen(closeTag);
  594. if (isNewHTML) ptr = tmp + 25;
  595. else ptr = tmp + 17;
  596. }
  597. else {
  598. break;
  599. }
  600. char endStr[64];
  601. if(!strncmp(ptr,"<a ", 3)) {
  602. char *tmp = ptr;
  603. while (*tmp != '>') tmp++;
  604. ptr = (char *)memmem_priv(ptr, tmp-ptr, "href=\"", 6);
  605. if (!ptr) {
  606. ptr = tmp;
  607. goto mailEnd;
  608. }
  609. replay:
  610. // has mail
  611. while(*ptr != '"') ptr++;
  612. ptr++;
  613. if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
  614. ptr += 28;
  615. while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
  616. encrypted[i] = 0;
  617. i = decryptMail((unsigned char *)mail,encrypted);
  618. int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
  619. ptr -= reconstruct_len;
  620. char *start = ptr;
  621. memcpy(ptr, "<a href=\"mailto:", 16);
  622. ptr += 16;
  623. memcpy(ptr, mail, i);
  624. ptr = start;
  625. i=0;
  626. goto replay;
  627. }
  628. else {
  629. if(!strncmp(ptr,"mailto:",7)) ptr += 7;
  630. while(1) {
  631. if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
  632. ptr = strchr(ptr,'>');
  633. ptr++;
  634. char *link = ptr;
  635. ptr = strstr(link,"</a>");
  636. memcpy(mail+i,link,ptr-link);
  637. i += ptr-link;
  638. ptr += 4;
  639. }
  640. else if(*ptr == '"') break;
  641. else mail[i++] = *ptr++;
  642. }
  643. //while(*ptr != '"') mail[i++] = *ptr++;
  644. mail[i] = 0;
  645. }
  646. mailEnd:
  647. snprintf(endStr,64,"</a></b>%s",closeTag);
  648. while(*ptr != '>') ptr++;
  649. ptr++;
  650. }
  651. /* we do not have to handle this special case because read.cgi on bbspink doesn't
  652. emit font tags anymore and it conflicts with text decorations using "melon point" */
  653. /*else if(!strncmp(ptr,"<font",5)) {
  654. snprintf(endStr,64,"</font></b>%s",closeTag);
  655. while(*ptr != '>') ptr++;
  656. ptr++;
  657. }*/
  658. else {
  659. snprintf(endStr,64,"</b>%s",closeTag);
  660. }
  661. i=0;
  662. while(1) {
  663. if(*ptr == '<') {
  664. if(!strncmp(ptr,endStr,strlen(endStr))) {
  665. ptr += strlen(endStr);
  666. break;
  667. }
  668. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
  669. int j=0;
  670. ptr = strstr(ptr,"data-cfemail=\"");
  671. ptr += 14;
  672. while(*ptr != '"') encrypted[j++] = *ptr++;
  673. encrypted[j] = 0;
  674. j = decryptMail((unsigned char *)name+i,encrypted);
  675. i += j;
  676. ptr = strstr(ptr,"</script>");
  677. ptr += 9;
  678. }
  679. else if(!strncmp(ptr,"<a href=\"",9)) {
  680. ptr = strchr(ptr,'>');
  681. ptr++;
  682. char *link = ptr;
  683. ptr = strstr(link,"</a>");
  684. memcpy(name+i,link,ptr-link);
  685. i += ptr-link;
  686. ptr += 4;
  687. }
  688. else name[i++] = *ptr++;
  689. }
  690. else name[i++] = *ptr++;
  691. }
  692. resData.append(name, i);
  693. resData.append("<>");
  694. if(mail[0]) resData.append(mail);
  695. resData.append("<>");
  696. ptr = strstr(ptr," class=\"date\">");
  697. if(ptr) {
  698. char *tmp = ptr;
  699. *ptr = 0;
  700. while(*ptr != '<') ptr--;
  701. snprintf(closeTag, 48, "</%s>", ptr+1);
  702. closeTagLen = strlen(closeTag);
  703. ptr = tmp + 14;
  704. }
  705. else {
  706. break;
  707. }
  708. i=0;
  709. while(1) {
  710. if(*ptr == '<') {
  711. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  712. ptr += closeTagLen;
  713. break;
  714. }
  715. else date[i++] = *ptr++;
  716. }
  717. else date[i++] = *ptr++;
  718. }
  719. if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
  720. char *tmp = ptr+1;
  721. while(*ptr != ' ') ptr++;
  722. *ptr = 0;
  723. snprintf(closeTag, 48, "</%s>", tmp);
  724. closeTagLen = strlen(closeTag);
  725. ptr += 11;
  726. while(*ptr != '>') ptr++;
  727. ptr++;
  728. date[i++] = ' ';
  729. while(1) {
  730. if(*ptr == '<') {
  731. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  732. ptr += closeTagLen;
  733. break;
  734. }
  735. else date[i++] = *ptr++;
  736. }
  737. else date[i++] = *ptr++;
  738. }
  739. }
  740. if (isNewHTML && !strncmp(ptr, "</span>", 7)) ptr += 7;
  741. if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
  742. ptr += 14;
  743. while(*ptr != '>') ptr++;
  744. ptr++;
  745. if(!strncmp(ptr,"<a href=\"",9)) {
  746. ptr += 9;
  747. while(*ptr != '/' && *ptr != '"') ptr++;
  748. if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
  749. memcpy(date+i," BE:",4);
  750. i += 4;
  751. ptr += 18;
  752. while(*ptr != '"') date[i++] = *ptr++;
  753. date[i++] = '-';
  754. ptr = strchr(ptr,'?');
  755. ptr++;
  756. char *tmp = strstr(ptr,"</a>");
  757. memcpy(date+i,ptr,tmp-ptr);
  758. i += tmp-ptr;
  759. ptr = tmp + 4;
  760. }
  761. }
  762. }
  763. resData.append(date, i);
  764. resData.append("<>");
  765. if (isNewHTML) {
  766. ptr = strstr(ptr," class=\"post-content\">");
  767. if (!ptr) {
  768. break;
  769. }
  770. else {
  771. char *tmp = ptr;
  772. char postContentTag[32];
  773. while (*tmp != '<') tmp--;
  774. memcpy(postContentTag, tmp+1, ptr-tmp-1);
  775. postContentTag[ptr-tmp-1] = 0;
  776. ptr += 22;
  777. if (!strncasecmp(ptr, "<span class=\"AA\">", 17)) {
  778. snprintf(closeTag, 48, "</span></%s>", postContentTag);
  779. closeTagLen = strlen(closeTag);
  780. ptr += 17;
  781. }
  782. else {
  783. snprintf(closeTag, 48, "</%s>", postContentTag);
  784. closeTagLen = strlen(closeTag);
  785. }
  786. }
  787. }
  788. else if(!strcmp(signatureTag,"<div")) {
  789. ptr = strstr(ptr,"<div class=\"message\">");
  790. if(!ptr) {
  791. break;
  792. }
  793. else {
  794. ptr += 21;
  795. if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
  796. if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
  797. strcpy(closeTag,"</span></span></div>");
  798. closeTagLen = 20;
  799. ptr += 22+17;
  800. }
  801. else {
  802. strcpy(closeTag,"</span></div>");
  803. closeTagLen = 13;
  804. ptr += 22;
  805. }
  806. }
  807. else {
  808. strcpy(closeTag,"</div>");
  809. closeTagLen = 6;
  810. }
  811. }
  812. }
  813. else {
  814. ptr = strstr(ptr,"<dd class=\"thread_in\">");
  815. if(!ptr) {
  816. break;
  817. }
  818. strcpy(closeTag,"</dd>");
  819. closeTagLen = 5;
  820. ptr += 22;
  821. }
  822. i=0;
  823. while(1) {
  824. if(*ptr == '<') {
  825. if(!strncasecmp(ptr,closeTag,closeTagLen)) {
  826. ptr += closeTagLen;
  827. break;
  828. }
  829. else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
  830. int j=0;
  831. ptr = strstr(ptr,"data-cfemail=\"");
  832. ptr += 14;
  833. while(*ptr != '"') encrypted[j++] = *ptr++;
  834. encrypted[j] = 0;
  835. j = decryptMail(body+i,encrypted);
  836. i += j;
  837. ptr = strstr(ptr,"</script>");
  838. ptr += 9;
  839. }
  840. else if(!strncmp(ptr,"<a ",3)) {
  841. char *tmp = strchr(ptr,'>');
  842. char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
  843. char *link = tmp+1;
  844. if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
  845. while(ptr < link) {
  846. if(!strncmp(ptr," class=\"",8)) {
  847. ptr += 8;
  848. while(*ptr != '"' && *ptr != '>') ptr++;
  849. if(*ptr == '"') ptr++;
  850. }
  851. else body[i++] = *ptr++;
  852. }
  853. }
  854. else {
  855. ptr = strstr(link,"</a>");
  856. memcpy(body+i,link,ptr-link);
  857. i += ptr-link;
  858. ptr += 4;
  859. }
  860. }
  861. else if(!strncmp(ptr,"<img src=\"",10)) {
  862. ptr += 10;
  863. char *img = ptr;
  864. ptr = strstr(img,"\">");
  865. memcpy(body+i,img,ptr-img);
  866. if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
  867. int length = ptr-img;
  868. while(*img != '/') {
  869. img++;
  870. length--;
  871. }
  872. memcpy(body+i,"sssp:",5);
  873. memcpy(body+i+5,img,length);
  874. i += length + 5;
  875. }
  876. else i += ptr-img;
  877. ptr += 2;
  878. }
  879. else if(!strncmp(ptr,"<br>",4)) {
  880. if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
  881. memcpy(body+i," <br>",5);
  882. i += 5;
  883. }
  884. else {
  885. memcpy(body+i,"<br>",4);
  886. i += 4;
  887. }
  888. ptr += 4;
  889. }
  890. else body[i++] = *ptr++;
  891. }
  892. else body[i++] = *ptr++;
  893. }
  894. resData.append((const char *)body, i);
  895. resData.append("<>");
  896. if(res == 1) resData.append(title);
  897. resData.append("\n");
  898. if(useCache && res == startResNum) {
  899. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  900. bool hit = false;
  901. if(info) {
  902. log_printf(5,"cache hit");
  903. if(info->cachedData.size() == resData.size()) {
  904. log_printf(5,"... size match");
  905. if(info->cachedData == resData) {
  906. log_printf(5,"... content match");
  907. hit = true;
  908. cachedSize = info->cachedSize - resData.size();
  909. }
  910. }
  911. log_printf(5,"\n");
  912. }
  913. if(!hit) {
  914. free(buffer);
  915. return "";
  916. }
  917. }
  918. txt += resData;
  919. res++;
  920. while(*ptr == '\n' || *ptr == '\r') ptr++;
  921. if (isNewHTML) snprintf(signature, 64, "<%s id=\"", signatureTag);
  922. else snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
  923. while (1) {
  924. ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
  925. if (!isNewHTML || !ptr) break;
  926. char *tmp = ptr;
  927. while (*tmp != '>') tmp++;
  928. tmp = (char *)memmem_priv(ptr, tmp-ptr, "data-date", 9);
  929. if (tmp) break;
  930. ptr++;
  931. }
  932. if(ptr) {
  933. int next = atoi(ptr+strlen(signature));
  934. if(next >= res) {
  935. while(next > res) {
  936. txt += "broken<><>broken<> broken <>\n";
  937. res++;
  938. }
  939. }
  940. else ptr = NULL;
  941. }
  942. if(!ptr) {
  943. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  944. info->lastResNum = res-1;
  945. info->cachedSize = txt.size()+cachedSize;
  946. info->cachedData = resData;
  947. _threadCache->set(_threadKey, info);
  948. log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
  949. if(lastModified) {
  950. *lastModified = 0;
  951. char formattedDate[256];
  952. char *ptr;
  953. ptr = date;
  954. int year = strtol(ptr,&ptr,10);
  955. if(*ptr != '/') break;
  956. ptr++;
  957. int month = strtol(ptr,&ptr,10);
  958. if(*ptr != '/') break;
  959. ptr++;
  960. int day = strtol(ptr,&ptr,10);
  961. if(!*ptr) break;
  962. while(*ptr != ' ' && *ptr != 0) ptr++;
  963. if(!*ptr) break;
  964. ptr++;
  965. int hour = strtol(ptr,&ptr,10);
  966. if(*ptr != ':') break;
  967. ptr++;
  968. int minutes = strtol(ptr,&ptr,10);
  969. if(*ptr != ':') break;
  970. ptr++;
  971. int seconds = strtol(ptr,&ptr,10);
  972. if(!(month>0 && month<13) || !(day>0 && day<32)) break;
  973. if(year < 100) year += 2000;
  974. #if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
  975. snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
  976. *lastModified = curl_getdate(formattedDate, NULL);
  977. #else
  978. snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
  979. struct tm time = {0};
  980. strptime(formattedDate,threadTimestampFmt,&time);
  981. *lastModified = mktime(&time);
  982. #endif
  983. }
  984. //fprintf(stderr,"not found,%ld\n",end-ptr+1);
  985. break;
  986. }
  987. }
  988. free(buffer);
  989. return txt;
  990. }
  991. std::string BBS2chProxyHTML2DatTalk::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
  992. {
  993. std::string out;
  994. if (!json || json_type(json) != JSONObject) {
  995. return "";
  996. }
  997. JSON_Object *root = json_object(json);
  998. const char *title = json_object_dotget_string(root, "data.title");
  999. const char *quoteSource = json_object_dotget_string(root, "data.quote_source");
  1000. JSON_Array *comments = json_object_dotget_array(root, "data.comments");
  1001. if (!title || !comments) {
  1002. return "";
  1003. }
  1004. if (startFrom < 1) startFrom = 1;
  1005. int prevNumber = startFrom - 1;
  1006. time_t lastModified = 0;
  1007. size_t cachedSize = 0;
  1008. std::string lastLine;
  1009. for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
  1010. std::stringstream line;
  1011. JSON_Object *comment = json_array_get_object(comments, i);
  1012. if (!comment) continue;
  1013. int number = json_object_get_number(comment, "number");
  1014. if (number < startFrom) continue;
  1015. const char *name = json_object_dotget_string(comment, "writer.name");
  1016. const char *trip = json_object_dotget_string(comment, "writer.trip");
  1017. const char *slip = json_object_dotget_string(comment, "writer.slip");
  1018. const char *id = json_object_dotget_string(comment, "writer.id");
  1019. time_t timestamp = json_object_get_number(comment, "timestamp");
  1020. const char *body = json_object_get_string(comment, "body");
  1021. if (timestamp > lastModified) lastModified = timestamp;
  1022. for (int j=prevNumber+1; j<number; j++) {
  1023. out += "broken<><>broken<> broken <>\n";
  1024. }
  1025. if (name) {
  1026. std::string tmp(name);
  1027. escapeForHTML(tmp);
  1028. line << tmp;
  1029. if (trip) line << "</b>◆" << trip << "<b>";
  1030. if (slip) line << " </b>(" << slip << ")<b>";
  1031. }
  1032. else line << "削除";
  1033. line << "<><>"; //mail cannot be obtained from json!
  1034. if (timestamp) {
  1035. char dateStr[256] = "";
  1036. struct tm timestamp_tm = {0};
  1037. timestamp += 32400;
  1038. gmtime_r(&timestamp, &timestamp_tm);
  1039. strftime(dateStr, 256, "%Y/%m/%d(", &timestamp_tm);
  1040. line << dateStr << wdays[timestamp_tm.tm_wday] << ") ";
  1041. strftime(dateStr, 256, "%H:%M:%S", &timestamp_tm);
  1042. line << dateStr;
  1043. if (id) {
  1044. line << " ID:" << id;
  1045. }
  1046. }
  1047. else line << "削除";
  1048. line << "<>";
  1049. if (body) {
  1050. std::string tmp(body);
  1051. escapeForHTML(tmp);
  1052. replaceAll(tmp, "\n", " <br> ");
  1053. line << " " << tmp;
  1054. if (number == 1 && quoteSource) {
  1055. line << " <br> <br> 出典 " << quoteSource;
  1056. }
  1057. line << " ";
  1058. }
  1059. else line << "削除";
  1060. line << "<>";
  1061. if (number == 1) {
  1062. std::string tmp(title);
  1063. escapeForHTML(tmp);
  1064. line << tmp;
  1065. }
  1066. line << "\n";
  1067. prevNumber = number;
  1068. char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
  1069. if (lineSJIS) {
  1070. lastLine = lineSJIS;
  1071. out += lastLine;
  1072. free(lineSJIS);
  1073. } else {
  1074. lastLine = "broken<><>broken<> broken <>\n";
  1075. out += lastLine;
  1076. }
  1077. if (useCache && startFrom == number) {
  1078. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  1079. bool hit = false;
  1080. if (info) {
  1081. log_printf(5, "cache hit");
  1082. if (info->cachedData.size() == lastLine.size()) {
  1083. log_printf(5, "... size match");
  1084. if (info->cachedData == lastLine) {
  1085. log_printf(5, "... content match");
  1086. hit = true;
  1087. cachedSize = info->cachedSize - lastLine.size();
  1088. }
  1089. }
  1090. log_printf(5, "\n");
  1091. }
  1092. if (!hit) {
  1093. return "";
  1094. }
  1095. }
  1096. }
  1097. if (!lastLine.empty()) {
  1098. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1099. info->lastResNum = prevNumber;
  1100. info->cachedSize = out.size() + cachedSize;
  1101. info->cachedData = lastLine;
  1102. _threadCache->set(_threadKey, info);
  1103. log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
  1104. }
  1105. if (lastModifiedOut) *lastModifiedOut = lastModified;
  1106. return out;
  1107. }
  1108. std::string BBS2chProxyHTML2Dat5chItest::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
  1109. {
  1110. std::string out;
  1111. if (!json || json_type(json) != JSONObject) {
  1112. return "";
  1113. }
  1114. JSON_Object *root = json_object(json);
  1115. JSON_Array *threadMeta = json_object_get_array(root, "thread");
  1116. time_t lastModified = json_array_get_number(threadMeta, 0);
  1117. const char *boardAndKey = json_array_get_string(threadMeta, 3);
  1118. const char *title = json_array_get_string(threadMeta, 5);
  1119. JSON_Array *comments = json_object_get_array(root, "comments");
  1120. if (!title || !*title || !comments) {
  1121. return "";
  1122. }
  1123. if (startFrom < 1) startFrom = 1;
  1124. int prevNumber = startFrom - 1;
  1125. size_t cachedSize = 0;
  1126. std::string lastLine;
  1127. for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
  1128. std::stringstream line;
  1129. JSON_Array *comment = json_array_get_array(comments, i);
  1130. if (!comment) continue;
  1131. int number = json_array_get_number(comment, 0);
  1132. if (number < startFrom) continue;
  1133. const char *name = json_array_get_string(comment, 1);
  1134. const char *mail = json_array_get_string(comment, 2);
  1135. const char *date = json_array_get_string(comment, 3);
  1136. const char *id = json_array_get_string(comment, 4);
  1137. const char *be = json_array_get_string(comment, 5);
  1138. const char *body = json_array_get_string(comment, 6);
  1139. for (int j=prevNumber+1; j<number; j++) {
  1140. out += "broken<><>broken<> broken <>\n";
  1141. }
  1142. if (name) line << name;
  1143. else line << "削除";
  1144. line << "<>";
  1145. if (mail) line << mail;
  1146. else line << "削除";
  1147. line << "<>";
  1148. if (date) {
  1149. line << date;
  1150. if (id && *id) {
  1151. line << " ID:" << id;
  1152. }
  1153. if (be && *be) {
  1154. line << " BE:" << be;
  1155. }
  1156. }
  1157. else line << "削除";
  1158. line << "<>";
  1159. if (body) {
  1160. const char *ptr = strstr(body, "&gt;&gt;");
  1161. const char *start = body;
  1162. while (ptr) {
  1163. const char *tmp = ptr;
  1164. unsigned int num = strtoul(ptr+8, (char **)&ptr, 10);
  1165. if (num > 0) {
  1166. if (tmp != start) line << std::string(start, tmp-start);
  1167. line << "<a href=\"../test/read.cgi/" << boardAndKey << "/" << num << "\" rel=\"noopener noreferrer\" target=\"_blank\">";
  1168. line << std::string(tmp, ptr-tmp);
  1169. line << "</a>";
  1170. }
  1171. else line << std::string(start, ptr-start);
  1172. start = ptr;
  1173. ptr = strstr(start, "&gt;&gt;");
  1174. }
  1175. line << start;
  1176. }
  1177. else line << "削除";
  1178. line << "<>";
  1179. if (number == 1) {
  1180. line << title;
  1181. }
  1182. line << "\n";
  1183. prevNumber = number;
  1184. char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
  1185. if (lineSJIS) {
  1186. lastLine = lineSJIS;
  1187. out += lastLine;
  1188. free(lineSJIS);
  1189. } else {
  1190. lastLine = "broken<><>broken<> broken <>\n";
  1191. out += lastLine;
  1192. }
  1193. if (useCache && startFrom == number) {
  1194. PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
  1195. bool hit = false;
  1196. if (info) {
  1197. log_printf(5, "cache hit");
  1198. if (info->cachedData.size() == lastLine.size()) {
  1199. log_printf(5, "... size match");
  1200. if (info->cachedData == lastLine) {
  1201. log_printf(5, "... content match");
  1202. hit = true;
  1203. cachedSize = info->cachedSize - lastLine.size();
  1204. }
  1205. }
  1206. log_printf(5, "\n");
  1207. }
  1208. if (!hit) {
  1209. return "";
  1210. }
  1211. }
  1212. }
  1213. if (!lastLine.empty()) {
  1214. PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
  1215. info->lastResNum = prevNumber;
  1216. info->cachedSize = out.size() + cachedSize;
  1217. info->cachedData = lastLine;
  1218. _threadCache->set(_threadKey, info);
  1219. log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
  1220. }
  1221. if (lastModifiedOut) *lastModifiedOut = lastModified;
  1222. return out;
  1223. }