1 # ===================================================================== 2 # updatePage.awk: RPC I/O function for rpclib/updatePage. 3 # 4 # Copyright (c) 2007-2014 Carlo Strozzi 5 # 6 # This program is free software; you can redistribute it and/or modify 7 # it under the terms of the GNU General Public License as published by 8 # the Free Software Foundation; version 2 dated June, 1991. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 # 19 # ===================================================================== 20 21 # ===================================================================== 22 # void _userproc(int mode) 23 # ===================================================================== 24 25 function _userproc(mode, value,uvalue,out,out1,j,api,\ 26 save,uri,a,i,tmp,tmp1,host,\ 27 port,path,re,len,tmp2,tmp3,\ 28 tmp4,twtitle,tmp5,mw,geo,my) { 29 30 if (mode == _O_REQUEST) { # request. 31 32 nodestem() 33 34 # Output files are mandatory! 35 if ((out=_rcget("TNS_CMS_CONTENT")) !~ /^\//) 36 return(_sys("csaExit.fault 0041 TNS_CMS_CONTENT")) 37 38 if ((out1=_rcget("TNS_PINGBACK_QUEUE")) !~ /^\//) 39 return(_sys("csaExit.fault 0041 TNS_PINGBACK_QUEUE")) 40 41 # BloggerAPI 1.0 42 if (_request("0_0",1) ~ /^blogger\./) { 43 api = "blogger" 44 bloggerAuth() 45 } 46 47 # MetaWeblog API and derivatives. 48 else if (_request("0_0",1) != _NULL) { 49 api = "mw" 50 bloggerAuth("2_1","3_1","1_1") 51 } 52 53 # target group. 54 value = substr(_request("1",1),1,ENVIRON["TNS_GROUP_MAXLEN"]) 55 56 # BloggerAPI 1.0 and MetaWeblog API 57 if (value == _NULL) { 58 if (api == "blogger") split(_request("2_1",1),a,"/") 59 else split(_request("1_1",1),a,"/") # MetaWeblog API (default) 60 61 # Handle numeric page specification. 62 if (a[2] == _NULL && (a[1]/=1)) { 63 tmp = ENVIRON["CSA_ROOT"] "/var/nodes/" \ 64 substr(a[1],1,1) "/" substr(a[1],2,1) "/" \ 65 substr(a[1],3,1) "/" a[1] 66 67 getline value < tmp; close(tmp) 68 split(value,a,"/") 69 _rcset("cgi.numeric","on") # tell the caller. 70 } 71 72 if (_LANG[a[1]] != _NULL) _rcset("CSA_LANG",a[1]) 73 74 # Target group name is expected to be a unixified string here. 75 value = a[2] 76 } 77 78 # group must not be null and it may not contain 79 # the ``.'' character. 80 81 if (value != _NULL && value !~ /\./) { 82 save = unixify(value) 83 _rcset("cgi.group",save) 84 _rcset("cgi.group.literal",value) 85 86 # Optional content editing GUI for this group. 87 # This is not necessarily equal to the one preferred 88 # by the user, but depends also on what is actually 89 # allowed for this group. 90 91 value = _request("twfilter_" ENVIRON["CSA_ID"] "_" save,1) 92 93 if (value ~ /^(parsewiki|rawhtml|tinymce|nicedit|ckeditor)$/) { 94 _rcset("cgi.filter",value) 95 _TNS_CMS_FILTER = value # needed by wikiFilter() 96 } 97 } 98 99 # RESTful target page within group. 100 tmp1 = substr(_request("2",1),1,ENVIRON["TNS_PAGE_MAXLEN"]) 101 102 # BloggerAPI 1.0 and MetaWeblog API: if no RESTful page name 103 # was supplied, try to get it through the API, in case this 104 # was a request for '*.editPost' and the target page name 105 # is expected to be an already unixified string. 106 107 if (tmp1 == _NULL && _request("0_0",1) != _NULL) tmp1 = a[3] 108 109 # Actual wiki page content. Large posts are treated like 110 # attachments by CSA, not to bloat the underlying shell 111 # script environment. 112 113 if (_request("4",1,_O_UPNAME) == "4" && \ 114 (value=_request("4",1,_O_ATTACH)) != "/dev/null") { 115 116 tmp2 = tmp3 = _NULL 117 while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n" 118 close(value) 119 sub(/\n$/,"",tmp2) # strip last newline. 120 } 121 122 # Note that the next section of code is be entered only if this 123 # program was called either to create or to update a post, because 124 # deleting a post does not take any content for the post body, 125 # of course. 126 127 else if ((tmp2=_request("4",1)) == _NULL && \ 128 _request("0_0",1) != "blogger.deletePost") { 129 130 # BloggerAPI 1.0 and MetaWeblog API. 131 # If I was called as '*.editPost' then the target page should 132 # have been provided in the call, but if it wasn't then I'll 133 # try and build the target page name myself, based on the 134 # element of the input data, if any. By doing so, I am 135 # actually making no distinction between '*.newPost' and 136 # '*.editPost', and this should simplify things quite a bit. 137 # If the blogging client requests an old post for editing and 138 # then changes the page title before submitting the changes, 139 # she will effectively be creating a new post. This is intentional 140 # and it is in line with the normal RESTful operation of TW. 141 142 if (api == "mw") { 143 144 # I only handle the "title", "categories" and "description" 145 # RSS 2.0 elements. For the "category" element I'll take 146 # only the first value of the array. Also, I disregard any 147 # "dateCreated" and "pubDate" elements because, beside the 148 # fact that they are received in UTC time, the creation date 149 # is computed automatically by TW while the "logical" date 150 # (vtime) is only allowed to be set through the TW extended 151 # title syntax. If I ever wanted to support them, it can 152 # be done by converting them from metaWeblog format 153 # (i.e. 20090611T08:15:00Z) to GNU date(1) format (that is 154 # "20090611 08:15:00+00" and then computing the corresponding 155 # local values with "date -d '20090611 08:15:00+00'" . 156 # Neither "dateCreated" nor "pubDate" will actually modify 157 # the page creation date, which must never be changed, but 158 # only the "logical" page date (vtime). If both "pubDate" 159 # and "dateCreated" were specified, then the former should 160 # take over. 161 162 for (i=1; (tmp5=_request("4_1_" i "_N",1)) != _NULL; i++) { 163 164 if (tmp5 == "title") 165 mw["title"] = _strip(_request("4_1_" i,1),_O_CRUSH) 166 167 else if (tmp5 == "categories") { 168 mw["category"] = _strip(_request("4_1_" i "_1"),_O_CRUSH) 169 170 # Issue an error message if multiple categories were 171 # specified by the client, as TW does not allow that. 172 # I could simply ignore all the specified categories 173 # but the first one, but I prefer not to lead the 174 # user into believing that her request to post to 175 # multiple categories has succeeded. 176 177 if (_strip(_request("4_1_" i "_2"),_O_CRUSH) != _NULL) 178 _sys("csaExit.fault 1036") 179 } 180 else if (tmp5 == "description") { 181 if (_request("4_1_" i "_1",1,_O_UPNAME) == "4_1_" i "_1" \ 182 && (value=_request("4_1_" i "_1",1,_O_ATTACH)) \ 183 != "/dev/null") { 184 185 tmp2 = tmp3 = _NULL 186 while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n" 187 close(value) 188 sub(/^\n+/,"",tmp2) # strip leading newlines. 189 sub(/\n+$/,"",tmp2) # strip trailing newlines. 190 } 191 else tmp2 = _request("4_1_" i,1) # get as scalar. 192 } 193 else if (tmp5 == "link") mw["link"] = value 194 } 195 tmp2 = _xmldecode(tmp2) # full post body. 196 } 197 198 # Blogger API. 199 else { 200 if (_request("5_1",1,_O_UPNAME) == "5_1" && \ 201 (value=_request("5_1",1,_O_ATTACH)) != "/dev/null") { 202 203 tmp2 = tmp3 = _NULL 204 while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n" 205 close(value) 206 sub(/^\n+/,"",tmp2) # strip leading newlines. 207 sub(/\n+$/,"",tmp2) # strip trailing newlines. 208 } 209 else tmp2 = _request("5_1",1) # get as scalar. 210 tmp2 = _xmldecode(tmp2) # full post body. 211 } 212 213 #tmp2 = _xmldecode(tmp2) # full post body. 214 215 # Try and extract TW metadata from MetaWeblog title first. 216 if ((tmp=mw["title"]) != _NULL) { 217 split(tmp,twtitle,/ *\| */) 218 tmp = twtitle[1] 219 } 220 else { 221 # Otherwise try with Blogger's "<title>" meta-element if any. 222 tmp = tmp2 # load post body 223 if (gsub(/(.*<title>|<\/title>.*)/,_NULL,tmp) != 2) tmp = _NULL 224 else { 225 tmp = _strip(tmp,_O_CRUSH) 226 split(tmp,twtitle,/ *\| */) 227 tmp = twtitle[1] 228 } 229 } 230 231 # Get page name from post title if still missing 232 # (which means that I was called as '*.newPost'). 233 if (tmp1 == _NULL) tmp1 = tmp 234 235 if (tmp != _NULL && _request("0_0",1) ~ /\.newPost$/) { 236 # Prepend group name to default page description if new post. 237 if (api == "blogger") split(_request("2_1",1),a,".") 238 else split(_request("1_1",1),a,".") # MetaWeblog API 239 # URI-encoded value not received over a URL, 240 # so explicit un-encoding is necessary. 241 a[2] = _uridecode(a[2],_O_PATHINFO) 242 if (a[2] != _NULL) tmp = a[2] "/" tmp 243 } 244 245 # No way to set a name for the page being created ? 246 # Then I need to issue a message slightly different 247 # from the usual '1001', hence I issue it here. 248 249 if (tmp1 == _NULL) return(_sys("csaExit.fault 1023")) 250 251 # Try and extract category from MetaWeblog "category" element 252 # first, then try with Blogger's "<category>" meta-element if any. 253 254 if ((tmp3=mw["category"]) == _NULL && _request("0_0",1) != _NULL) { 255 tmp3 = tmp2 # load post body 256 if (gsub(/(.*<category>|<\/category>.*)/,_NULL,tmp3) != 2) 257 tmp3 = _NULL 258 } 259 260 if (tmp3 != _NULL) { 261 262 # If the client specified an explicit category for the page 263 # I'll take that argument into account only if this is a new 264 # page, otherwise the explicit category will be ignored because 265 # an old page must retain whatever category it belongs into, 266 # and the relevant value will be then taken from the page name. 267 268 if (_request("0_0",1) ~ /\.newPost$/) { 269 270 # The category name may not contain dots. 271 if (tmp3 ~ /\./) return(_sys("csaExit.fault 1025")) 272 273 # Since I've been passed an explicit category, I chop 274 # any implicit category name off the page name and replace 275 # it with the passed value. Any dots in page name after 276 # the category name can safely be left in place. 277 278 sub(/[^.]*\./,_NULL,tmp1) 279 tmp1 = tmp3 "." tmp1 280 } 281 } 282 } 283 284 # Strip title and category from post, both for RPC2 and REST 285 # (but they would not pass tidy(1) later-on anyway). Note how, 286 # given the current mechanism, if either <title> ... 287 # or ... literal text is inserted in a page 288 # through RESTful editing, such text will likey be interpreted as 289 # the relevant meta-directives if the page is subsequently edited 290 # with blogging API clients. This issue can be solved by entering 291 # the opening bracket as "(:amp:)lt;" as opposed to literally, 292 # like this: "(:amp:)lt;title>" . 293 294 gsub(/.*<\/title>/,_NULL,tmp2) 295 gsub(/<category>.*<\/category>/,_NULL,tmp2) 296 297 # Tell the underlying rc(1) program whether there's no content. 298 # Note that we cannot test for tags here (as we do in showPage) 299 # because the post may not yet contain any, depending on what 300 # was used to edit it on the client side. 301 302 #if (tmp2 !~ /[a-zA-Z0-9]/) _rcset("cgi.empty","1") 303 # Strongly non-latin languages require a broader test. 304 if (tmp2 ~ /^[ \t\n\r]*$/) _rcset("cgi.empty","1") 305 306 # Store the parsed Wiki content. 307 printf("%s",wikiFilter(tmp2,save)) > out 308 309 gsub(/[ \t\n\r]/," ",tmp1) # neutralize real junk. 310 311 # Set actual page name, accounting for max allowed length. 312 value = _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]),_O_MIDDLE) 313 314 # page name must be at least 2-character long. 315 if (length(value) > 1) { 316 317 _rcset("cgi.page.uri",_uriencode(value,_O_PATHINFO)) 318 tmp4 = value # save for tags table. 319 320 # Accept a page literal name longer than the actual page name. 321 _rcset("cgi.page.literal",\ 322 _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]+32),_O_MIDDLE)) 323 324 value = unixify(value,1) 325 _rcset("cgi.page",value) 326 327 # extract unixified page meta-category if available. 328 if ((value=getcat(value)) != _NULL) _rcset("cgi.subcat",value) 329 330 # extract literal page meta-category if available. 331 value = _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]),_O_MIDDLE) 332 if ((value=getcat(value)) != _NULL) 333 _rcset("cgi.subcat.literal",value) 334 335 if (_request("0_0",1) != "blogger.deletePost") { 336 337 # Optional RESTful page short title/description. 338 339 if ((value=_request("6",1)) == _NULL) { 340 341 # Blogger API 1.0: use the previously saved value if any, 342 # and if no explicit description was supplied through 343 # the TW post title's extended syntax. 344 twtitle[2] == _NULL ? value = tmp : value = twtitle[2] 345 346 if (_request("0_0",1) != _NULL) { 347 348 # Blogger API 1.0 and MetaWeblog API: set page either 349 # hidden or visible, depending on the "publish" flag. 350 351 if (_bool(_request("6_1",1)) == _TRUE || \ 352 _bool(_request("5_1",1)) == _TRUE) 353 sub(/^[- ]*/,_NULL,value) 354 else sub(/^[- ]*/,"-",value) 355 } 356 } 357 358 if (value != _NULL) { 359 # Accept twice as many characters if the page title 360 # contains a redirection CPI. 361 value ~ /\(:redirect.*:\)/ ? i=2 : i=1 362 value = substr(value,1,ENVIRON["TNS_PAGE_MAXTITLE"]*i) 363 gsub(/[\t\r\n]+/," ",value) 364 #sub(/^[- ]+$/,"",value) 365 value = _strip(value,_O_MIDDLE) 366 _rcset("cgi.page.descr",value) 367 368 #tmp = value # save again, for later. 369 } 370 } 371 372 # Optional author name of this change. If no explicit author was 373 # entered then try and get it from the relevant HTTP cookie if 374 # available, 375 376 if ((value=_request("7",1)) == _NULL) value = twtitle[4] 377 if (value == _NULL) value = _request("twauthor",1) 378 379 if (value != _NULL) { 380 value = substr(value,1,32) 381 gsub(/[\t\r\n<>]+/," ",value) 382 value = _strip(value,_O_MIDDLE) 383 384 # encode values that are to be sent in HTTP cookie headers. 385 _rcset("cgi.author",value) 386 _rcset("cgi.author.uri",_uriencode(value)) 387 } 388 389 # Optional comma- or blank-separated list of authentication 390 # groups who are granted read access to this page; if this 391 # is not supplied then the page can be read by anyone. 392 393 if ((value=_request("5",1)) == _NULL) value = twtitle[6] 394 gsub(/ +/,",",value); gsub(/,+/,",",value) 395 gsub(/^,+/,"",value); gsub(/,+$/,"",value) 396 397 # remove any duplicated group names in user input. 398 399 if ((i=split(value,a,","))) { 400 value = _NULL 401 _sort(a,i) 402 i = _uniq(a,i) 403 while (i > 0) { 404 if (value == _NULL) value = a[i] 405 else value = value "," a[i] 406 i-- 407 } 408 } 409 410 411 # Authorization groups are like user IDs: they must always 412 # be lower-case, or things may become messy. 413 value = tolower(value) 414 415 if (value ~ /^([a-z]+[a-z0-9]+,?)+$/) _rcset("cgi.allow",value) 416 417 # Optional tags associated with this page. Multi-word tags are 418 # supported, with individual words separated by an underscore "_". 419 420 if ((value=_request("9",1)) == _NULL) value = twtitle[5] 421 422 gsub(/["\t\r\n<>]+/," ",value) # just in case 423 gsub(/_+ +/," ",value); gsub(/__+/,"_",value) 424 sub(/^[ _]+/,"",value); sub(/[ _]+$/,"",value) 425 value = _strip(value,_O_MIDDLE) 426 value = substr(value,1,256); sub(/[ _]+$/,"",value) 427 428 # remove any duplicated tags in user input. 429 430 if ((i=split(value,a," "))) { 431 value = _NULL 432 for (j in a) { 433 # preserve casing if local or title tag. 434 if (a[j] !~ /^[MT]:/) a[j] = tolower(a[j]) 435 } 436 _sort(a,i) 437 i = _uniq(a,i) 438 while (i > 0) { 439 440 # Keyword-type tags may optionally be prefixed with "k:" . 441 # Loop over if nothing left after stripping "k:" . 442 if (sub(/^k:/,"",a[i]) && a[i] == "") { 443 i-- 444 continue 445 } 446 447 # Handle geo-tags if any (retain only the first one). 448 # No checks for well-formedness are done here. 449 # Format is: g:lat:long[:ISO_3166-2] . 450 # Note that for the geo tag not to have to be re-entered 451 # upon each edit session it will have to be saved 452 # in the tag+dat along with all other tags. 453 454 else if (a[i] ~ /^g:/) { 455 if (geo == "") geo = substr(a[i],3) 456 else a[i] = "" 457 } 458 459 # Handle local (i.e. "my") tag in a similar way. Again, 460 # retain only the first one. Only basic sanity checks are 461 # done here. Also the local tag is to be saved in tag+dat. 462 # Note that no tags may contain spaces, or they will be 463 # split into multiple unrelated tags. Like all tags, also 464 # the local one must be entered with spaces replaced by 465 # underscores. Unlike other tags, the local tag is not 466 # turned into lower-case (see above); of course this 467 # means that local tags that differ only in their casing 468 # will produce separate entries in tag+dat. 469 470 else if (a[i] ~ /^M:/) { 471 if (my == "") my = substr(a[i],3) 472 else a[i] = "" 473 } 474 475 if (value == _NULL) value = a[i--] 476 else value = value " " a[i--] 477 } 478 } 479 480 if (value ~ /^([MT]:)?./) { 481 _rcset("cgi.tags",_strip(value)) 482 value = " " value " " 483 # For distance searches to be possible (not yet implemented) 484 # the geo tag must also be saved in tag+dat. 485 #gsub(/ g:[^ ]+/,_NULL,value) 486 value = _strip(value,_O_MIDDLE) 487 _rcset("cgi.tags.xml",_xmlencode(value)) 488 gsub(" ","\t" unixify(tmp4,1) "\t%s\n",value) 489 value = value "\t" unixify(tmp4,1) "\t%s\n" 490 _rcset("cgi.tags.tbl",value) 491 } 492 493 # Optional Geo tag (see above). Note that tags are xml-encoded 494 # here just in case they contain something weird, to ease the 495 # job of the underlying shell procedure. 496 497 if ((i=split(geo,a,":")) > 1) { 498 for (j=1; j<=i; j++) { 499 # probably irrelevant, but nevertheless ... 500 if (j == 3) a[j] = toupper(a[j]) 501 _rcset("cgi.tags.geo",_xmlencode(a[j]),_NULL,_O_APPEND) 502 } 503 } 504 505 # Optional date and time the content of the page relates to 506 # (mainly for news items). 507 508 if ((value=_strip(_request("8",1),_O_MIDDLE)) == _NULL) 509 value = _strip(twtitle[3],_O_MIDDLE) 510 511 tmp = value # extract ranking 512 sub(/ *,.*/,"",tmp) 513 if (tmp !~ /^[1-9]$/) tmp = "0" # default ranking 514 sub(/^.*, */,"",value) 515 516 # several tweaks on reldate field, to try and catch/fix 517 # the most common mistakes. 518 519 if (value !~ /:/) 520 value = value " " _TIME["H"] ":" _TIME["M"] ":" _TIME["S"] 521 sub(/[^0-9]+$/,_NULL,value) 522 # insert leading "0" if missing in time spec. 523 if (value ~ / [0-9]:/) sub(/ /," 0",value) 524 # trim any leading malformed time specs. 525 while (value ~ /:[0-9]$/) sub(/:[0-9]$/,_NULL,value) 526 # append trailing seconds if missing. 527 if (value ~ / [0-2][0-9]:[0-9][0-9]$/) sub(/$/,":00",value) 528 if (_isdate(value,1) == _TRUE) value = _localdate(value,1) 529 value = _isodate(value) 530 if (_isdate(value) == _TRUE) _rcset("cgi.reldate",tmp "," value) 531 532 # Optional page expiration date and time (will apply also to any 533 # page private attachments). 534 535 if ((value=_strip(_request("13",1),_O_MIDDLE)) == _NULL) 536 value = _strip(twtitle[9],_O_MIDDLE) 537 538 # several tweaks on expdate field, to try and catch/fix 539 # the most common mistakes. 540 541 if (value != _NULL) { 542 if (value !~ /:/) 543 value = value " " _TIME["H"] ":" _TIME["M"] ":" _TIME["S"] 544 sub(/[^0-9]+$/,_NULL,value) 545 # insert leading "0" if missing in time spec. 546 if (value ~ / [0-9]:/) sub(/ /," 0",value) 547 # trim any leading malformed time specs. 548 while (value ~ /:[0-9]$/) sub(/:[0-9]$/,_NULL,value) 549 # append trailing seconds if missing. 550 if (value ~ / [0-2][0-9]:[0-9][0-9]$/) sub(/$/,":00",value) 551 if (_isdate(value,1) == _TRUE) value = _localdate(value,1) 552 553 value = _isodate(value) 554 if (_isdate(value) == _TRUE) _rcset("cgi.expdate",value) 555 } 556 557 # Optional URL (loosely) associated with this page. 558 if ((value=_request("10",1)) == _NULL && \ 559 (value=mw["link"]) == _NULL) value = twtitle[7] 560 value = substr(_strip(value,_O_CRUSH),1,256) 561 if (_isuri(value) == _TRUE) _rcset("cgi.link",value) 562 563 # Optional store catalog metadata, see the specs. 564 if ((value=_request("11",1)) == _NULL) value = twtitle[8] 565 566 # Part nos. are turned into upper-case, and the whole 567 # field content is sanitized, to prevent garbage that 568 # certain spreadsheet programs used by the user to maintain 569 # price lists may have inserted in numeric fields. 570 571 value = toupper(substr(value,1,64)) 572 gsub(/[^-.: A-Z0-9,]/," ",value) 573 value = _strip(value,_O_CRUSH) 574 if (value != _NULL) { 575 # apply basic sanity checks. 576 gsub(/ *, */,",",value) 577 #gsub(/:+/,":",value) # probably NOT to be done. 578 gsub(/,+/,",",value) 579 if ((i=split(value,a," ")) > 3) i = 3 # ignore extra fields. 580 # prepend a leading colon if necessary. 581 if (a[1] !~ /:/) a[1] = ":" a[1] 582 sub(/:+$/,"",a[2]); sub(/:+$/,"",a[3]); 583 # set price2 equal to price1 if unspecified. 584 if (a[2] !~ /:/) a[2] = a[2] ":" a[2] 585 value = _NULL 586 for (j=1; j<=i; j++) value = value " " a[j] 587 _rcset("cgi.store",_strip(value)) 588 } 589 } 590 591 # Toggle comments for this page. Comments are enabled/disabled if 592 # the relevant selection is either "true" or "false" respectively, 593 # otherwise they are left into whatever previous state. This is 594 # to save me the hassle of modifying 'editPage' in order to pre-set 595 # the relevant form value to show the current state. 596 597 if ((value=_request("12",1)) ~ /^(true|false)$/) 598 _rcset("cgi.comments",value) 599 600 # 1st extra argument (current content checksum) 601 value = _request("3",1) 602 if (value ~ /^[a-zA-Z0-9]*$/) _rcset("cgi.checksum",value) 603 604 # The following test is necessary since the address could, 605 # at least in theory, have been set to any string by the 606 # remote user, due to how it is handled to cope with stunnel(8) 607 # and the lack of transproxy support in kernel 2.4.x. 608 609 if (_isipaddr(ENVIRON["REMOTE_ADDR"]) == _TRUE) 610 value = ENVIRON["REMOTE_ADDR"] 611 else value = "0.0.0.0" 612 613 _rcset("REMOTE_ADDR",value) 614 615 # Next stuff to be done only if confirmed. 616 617 if (_csa("confirmed") != _TRUE) return 618 619 # These should never be null, but just in case ... 620 if (ENVIRON["SERVER_PORT"] == "") port = "80" 621 else port = ENVIRON["SERVER_PORT"] 622 623 if (ENVIRON["SERVER_NAME"] == "") host = ENVIRON["CSA_FQDN"] 624 else host = ENVIRON["SERVER_NAME"] 625 626 # Try and extract target URLs from content body, and build a 627 # batch pingback script with them. This code makes no distinction 628 # between local and remote URLs and treats both the same. That is, 629 # both pings will be sent by using the pingback autodiscovery 630 # protocol and will work only with those target pages that support 631 # it. Also, since this code must work with both XHTML source and 632 # parsewiki(1) content, it will take into account all URLs, even 633 # those which are enclosed in XML comment blocks "<!-- ... ->" . 634 635 value = tmp2 # restore saved body 636 637 save = value # save for later 638 639 # source URL (i.e. ourselves). 640 value = ENVIRON["CSA_REQUEST_URI"] 641 gsub(/.UNKNOWN/,_NULL,value) # can be /UNKNOWN?UNKNOWN 642 643 if (value !~ /^[-_.a-zA-Z0-9%+\/&=?$;:,#]+$/) 644 value = _uriencode(value) 645 646 gsub(/%2[Ff]/,"/",value) # un-encode slashes 647 648 re = "^/" _escreg(ENVIRON["CSA_ID"]) "(/I)?" 649 sub(re,_NULL,value) 650 651 value = ENVIRON["CSA_RPC_URI"] value 652 653 len = 207 + length(_xmlencode(value)) 654 655 # No support for https URLs at the moment, because nc(1) does 656 # not handle them and something like openssl(1) would be needed. 657 # I may implemented this the future, although it is rather 658 # uncommon that a pingback-server be reachable through SSL. 659 660 i = split(save,a,/(http|HTTP):/) 661 662 # destination URLs. 663 664 print "# This program parses potentially insecure\n" \ 665 "# external arguments; for better security it\n" \ 666 "# must be run through the rc(1) shell, as opposed\n" \ 667 "# to Bourne sh(1) and derivatives.\n" > out1 668 669 for (j=1; j<=i; j++) { 670 671 # skip blatantly invalid URLs. 672 if (a[j] !~ /^\/\/([a-zA-Z0-9][-a-zA-Z0-9]*\.)+/) continue 673 674 # truncate at first really odd character. 675 sub(/[ \t\r\n'"].*/,_NULL,a[j]) 676 677 # still garbage? skip URL. 678 if (a[j] !~ /^[-_.a-zA-Z0-9%+\/&=?$;:,#]+$/) continue 679 680 # Try and extract host, port and path from URI. 681 # In fact, all this will have to be done by the 682 # pb-discover utility, so I leave it out here. 683 # 684 #host = port = path = _NULL # clear on each loop. 685 # 686 #host = a[j] 687 #sub(/^\/\//,"",host) 688 #sub(/[\/?:].*/,"",host) 689 # 690 #port = a[j] 691 #sub(/^\/\//,"",port) 692 #sub(/[\/?].*/,"",port) 693 #if (sub(/.*:/,"",port) && port ~ /^[1-9][0-9]+$/); 694 #else port = "80" 695 # 696 #path = a[j] 697 #sub(/^\/\/[^\/?]+/,"",path) 698 #path = _xmldecode(path) 699 #if (path ~ /<>/) path = "/" 700 701 print "nl='\n'\npbserver=``($nl){pb-discover " \ 702 _rcescape("http:" _xmldecode(a[j])) "}\n" \ 703 "if (~ $#pbserver 3) { # need host,port,path\n" \ 704 "{echo POST $pbserver(3) HTTP/1.0\n" \ 705 "echo Host: $pbserver(1)\n" \ 706 "cat <<'EOF'\n" \ 707 "User-Agent: CSA/" ENVIRON["CSA_VERSION"] "\n" \ 708 "Content-Type: text/xml\n" \ 709 "Content-Length: " \ 710 len + length("http:" _xmlencode(a[j])) \ 711 "\n\n<?xml version=\"1.0\"?>\n" \ 712 "<methodCall>\n" \ 713 "<methodName>pingback.ping</methodName>\n" \ 714 "<params>\n<param>\n<value><string>" _xmlencode(value) \ 715 "</string></value>\n</param>\n<param>\n<value><string>" \ 716 _xmlencode("http:" a[j]) \ 717 "</string></value>\n</param>\n</params>\n" \ 718 "</methodCall>\nEOF\n}|" \ 719 "nc -w 10 $pbserver(1) $pbserver(2);sleep 1}\n" > out1 720 } 721 } 722 723 else { # response 724 725 if (_csa("confirmed") == _TRUE) { 726 727 out1 = _rcget("TNS_PINGBACK_QUEUE") 728 729 # Ping supported remote indexing engines where appropriate. 730 # Make sure the output file is appended to, as it may already 731 # contain stuff written during _O_REQUEST ! 732 # 733 # Add more special pingable services below as needed. 734 735 if (_bool(ENVIRON["TNS_PING_TECHNORATI"]) == _TRUE) 736 pingSpecial(out1,"technorati") 737 } 738 739 # generic template conditionals. 740 741 ifsections() 742 743 # Default page title to page name and set page subcat name if any. 744 745 value = tmp = _rcget("tpl.var.tw.page",1) 746 747 if ((tmp=getcat(tmp)) != _NULL) { 748 _response("tpl.var.tw.page.subcat",tmp) 749 sub(/[^.]+\./,_NULL,value) 750 _html("show","tpl.if.tw.subcat","tpl.fi.tw.subcat") 751 } else _html("hide","tpl.if.tw.subcat","tpl.fi.tw.subcat") 752 753 # I cannot use the content of a possible "T:" tag here, because 754 # it is not yet individually available in the underlying rc(1) 755 # script at this stage. This isn't a problem though, because that 756 # tag is mostly meant for showPage, so not using it here is OK. 757 758 _response("tpl.var.html.title",value) 759 } 760 } 761 762 # EOF