1	# =====================================================================
     2	# updatePage.awk: RPC I/O function for rpclib/updatePage.
     3	#
     4	# Copyright (c) 2007-2014 Carlo Strozzi
     5	#
     6	# This program is free software; you can redistribute it and/or modify
     7	# it under the terms of the GNU General Public License as published by
     8	# the Free Software Foundation; version 2 dated June, 1991.
     9	#
    10	# This program is distributed in the hope that it will be useful,
    11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
    12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13	# GNU General Public License for more details.
    14	#
    15	# You should have received a copy of the GNU General Public License
    16	# along with this program; if not, write to the Free Software
    17	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    18	#
    19	# =====================================================================
    20	
    21	# =====================================================================
    22	# void _userproc(int mode)
    23	# =====================================================================
    24	
    25	function _userproc(mode,		value,uvalue,out,out1,j,api,\
    26						save,uri,a,i,tmp,tmp1,host,\
    27						port,path,re,len,tmp2,tmp3,\
    28						tmp4,twtitle,tmp5,mw,geo,my) {
    29	
    30	   if (mode == _O_REQUEST) {				# request.
    31	
    32	      nodestem()
    33	
    34	      # Output files are mandatory!
    35	      if ((out=_rcget("TNS_CMS_CONTENT")) !~ /^\//)
    36		 return(_sys("csaExit.fault 0041 TNS_CMS_CONTENT"))
    37	
    38	      if ((out1=_rcget("TNS_PINGBACK_QUEUE")) !~ /^\//)
    39		 return(_sys("csaExit.fault 0041 TNS_PINGBACK_QUEUE"))
    40	
    41	      # BloggerAPI 1.0
    42	      if (_request("0_0",1) ~ /^blogger\./) {
    43		 api = "blogger"
    44		 bloggerAuth()
    45	      }
    46	
    47	      # MetaWeblog API and derivatives.
    48	      else if (_request("0_0",1) != _NULL) {
    49		 api = "mw"
    50		 bloggerAuth("2_1","3_1","1_1")
    51	      }
    52	
    53	      # target group.
    54	      value = substr(_request("1",1),1,ENVIRON["TNS_GROUP_MAXLEN"])
    55	
    56	      # BloggerAPI 1.0 and MetaWeblog API
    57	      if (value == _NULL) {
    58		 if (api == "blogger") split(_request("2_1",1),a,"/")
    59		 else split(_request("1_1",1),a,"/")	# MetaWeblog API (default)
    60	
    61		 # Handle numeric page specification.
    62		 if (a[2] == _NULL && (a[1]/=1)) {
    63		    tmp = ENVIRON["CSA_ROOT"] "/var/nodes/" \
    64			substr(a[1],1,1) "/" substr(a[1],2,1) "/" \
    65			substr(a[1],3,1) "/" a[1]
    66	
    67		    getline value < tmp; close(tmp)
    68		    split(value,a,"/") 
    69		    _rcset("cgi.numeric","on")		# tell the caller.
    70		 }
    71	
    72		 if (_LANG[a[1]] != _NULL) _rcset("CSA_LANG",a[1])
    73	
    74		 # Target group name is expected to be a unixified string here.
    75		 value = a[2]
    76	      }
    77	
    78	      # group must not be null and it may not contain
    79	      # the ``.'' character.
    80	
    81	      if (value != _NULL && value !~ /\./) {
    82		 save = unixify(value)
    83		 _rcset("cgi.group",save)
    84		 _rcset("cgi.group.literal",value)
    85	
    86	         # Optional content editing GUI for this group.
    87	         # This is not necessarily equal to the one preferred
    88	         # by the user, but depends also on what is actually
    89	         # allowed for this group.
    90	
    91	         value = _request("twfilter_" ENVIRON["CSA_ID"] "_" save,1)
    92	
    93	         if (value ~ /^(parsewiki|rawhtml|tinymce|nicedit|ckeditor)$/) {
    94	            _rcset("cgi.filter",value)
    95	            _TNS_CMS_FILTER = value             # needed by wikiFilter()
    96	         }
    97	      }
    98	
    99	      # RESTful target page within group.
   100	      tmp1 = substr(_request("2",1),1,ENVIRON["TNS_PAGE_MAXLEN"])
   101	
   102	      # BloggerAPI 1.0 and MetaWeblog API: if no RESTful page name
   103	      # was supplied, try to get it through the API, in case this
   104	      # was a request for '*.editPost' and the target page name
   105	      # is expected to be an already unixified string.
   106	
   107	      if (tmp1 == _NULL && _request("0_0",1) != _NULL) tmp1 = a[3]
   108	
   109	      # Actual wiki page content. Large posts are treated like
   110	      # attachments by CSA, not to bloat the underlying shell
   111	      # script environment.
   112	
   113	      if (_request("4",1,_O_UPNAME) == "4" && \
   114		 (value=_request("4",1,_O_ATTACH)) != "/dev/null") {
   115	
   116		 tmp2 = tmp3 = _NULL
   117		 while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n"
   118		 close(value)
   119		 sub(/\n$/,"",tmp2)		# strip last newline.
   120	      }
   121	
   122	      # Note that the next section of code is be entered only if this
   123	      # program was called either to create or to update a post, because
   124	      # deleting a post does not take any content for the post body,
   125	      # of course.
   126	
   127	      else if ((tmp2=_request("4",1)) == _NULL && \
   128				_request("0_0",1) != "blogger.deletePost") {
   129	
   130		 # BloggerAPI 1.0 and MetaWeblog API.
   131		 # If I was called as '*.editPost' then the target page should
   132		 # have been provided in the call, but if it wasn't then I'll
   133		 # try and build the target page name myself, based on the
   134		 # <title> element of the input data, if any. By doing so, I am
   135		 # actually making no distinction between '*.newPost' and
   136		 # '*.editPost', and this should simplify things quite a bit.
   137		 # If the blogging client requests an old post for editing and
   138		 # then changes the page title before submitting the changes,
   139		 # she will effectively be creating a new post. This is intentional
   140		 # and it is in line with the normal RESTful operation of TW.
   141	
   142		 if (api == "mw") {
   143	
   144		    # I only handle the "title", "categories" and "description"
   145		    # RSS 2.0 elements. For the "category" element I'll take
   146		    # only the first value of the array. Also, I disregard any
   147		    # "dateCreated" and "pubDate" elements because, beside the
   148		    # fact that they are received in UTC time, the creation date
   149		    # is computed automatically by TW while the "logical" date
   150		    # (vtime) is only allowed to be set through the TW extended
   151		    # title syntax. If I ever wanted to support them, it can
   152		    # be done by converting them from metaWeblog format
   153		    # (i.e. 20090611T08:15:00Z) to GNU date(1) format (that is
   154		    # "20090611 08:15:00+00" and then computing the corresponding
   155		    # local values with "date -d '20090611 08:15:00+00'" .
   156		    # Neither "dateCreated" nor "pubDate" will actually modify
   157		    # the page creation date, which must never be changed, but
   158		    # only the "logical" page date (vtime). If both "pubDate"
   159		    # and "dateCreated" were specified, then the former should
   160		    # take over.
   161	
   162		    for (i=1; (tmp5=_request("4_1_" i "_N",1)) != _NULL; i++) {
   163	
   164			if (tmp5 == "title")
   165			   mw["title"] = _strip(_request("4_1_" i,1),_O_CRUSH)
   166	
   167			else if (tmp5 == "categories") {
   168			   mw["category"] = _strip(_request("4_1_" i "_1"),_O_CRUSH)
   169	
   170			   # Issue an error message if multiple categories were
   171			   # specified by the client, as TW does not allow that.
   172			   # I could simply ignore all the specified categories
   173			   # but the first one, but I prefer not to lead the
   174			   # user into believing that her request to post to
   175			   # multiple categories has succeeded.
   176	
   177			   if (_strip(_request("4_1_" i "_2"),_O_CRUSH) != _NULL)
   178							_sys("csaExit.fault 1036")
   179			}
   180			else if (tmp5 == "description") {
   181			  if (_request("4_1_" i "_1",1,_O_UPNAME) == "4_1_" i "_1" \
   182				&& (value=_request("4_1_" i "_1",1,_O_ATTACH)) \
   183				!= "/dev/null") {
   184	
   185			     tmp2 = tmp3 = _NULL
   186			     while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n"
   187			     close(value)
   188			     sub(/^\n+/,"",tmp2)	# strip leading newlines.
   189			     sub(/\n+$/,"",tmp2)	# strip trailing newlines.
   190			  }
   191			  else tmp2 = _request("4_1_" i,1)	# get as scalar.
   192			}
   193			else if (tmp5 == "link") mw["link"] = value
   194		    }
   195		    tmp2 = _xmldecode(tmp2)		# full post body.
   196		 }
   197	
   198		 # Blogger API.
   199		 else {
   200		    if (_request("5_1",1,_O_UPNAME) == "5_1" && \
   201			(value=_request("5_1",1,_O_ATTACH)) != "/dev/null") {
   202	
   203		       tmp2 = tmp3 = _NULL
   204		       while (getline tmp3 < value > 0) tmp2 = tmp2 tmp3 "\n"
   205		       close(value)
   206		       sub(/^\n+/,"",tmp2)		# strip leading newlines.
   207		       sub(/\n+$/,"",tmp2)		# strip trailing newlines.
   208		    }
   209		    else tmp2 = _request("5_1",1)	# get as scalar.
   210		    tmp2 = _xmldecode(tmp2)		# full post body.
   211		 }
   212	
   213		 #tmp2 = _xmldecode(tmp2)		# full post body.
   214	
   215		 # Try and extract TW metadata from MetaWeblog title first.
   216		 if ((tmp=mw["title"]) != _NULL) {
   217		    split(tmp,twtitle,/ *\| */)
   218		    tmp = twtitle[1]
   219		 }
   220		 else {
   221		    # Otherwise try with Blogger's "<title>" meta-element if any.
   222		    tmp = tmp2				# load post body
   223		    if (gsub(/(.*<title>|<\/title>.*)/,_NULL,tmp) != 2) tmp = _NULL
   224		    else {
   225		      tmp = _strip(tmp,_O_CRUSH)
   226		      split(tmp,twtitle,/ *\| */)
   227		      tmp = twtitle[1]
   228		    }
   229		 }
   230	
   231		 # Get page name from post title if still missing
   232		 # (which means that I was called as '*.newPost').
   233		 if (tmp1 == _NULL) tmp1 = tmp
   234	
   235		 if (tmp != _NULL && _request("0_0",1) ~ /\.newPost$/) {
   236		    # Prepend group name to default page description if new post.
   237		    if (api == "blogger") split(_request("2_1",1),a,".")
   238		    else split(_request("1_1",1),a,".")		# MetaWeblog API
   239		    # URI-encoded value not received over a URL,
   240		    # so explicit un-encoding is necessary.
   241		    a[2] = _uridecode(a[2],_O_PATHINFO)
   242		    if (a[2] != _NULL) tmp = a[2] "/" tmp
   243		 }
   244	
   245		 # No way to set a name for the page being created ?
   246		 # Then I need to issue a message slightly different
   247		 # from the usual '1001', hence I issue it here.
   248	
   249		 if (tmp1 == _NULL) return(_sys("csaExit.fault 1023"))
   250	
   251		 # Try and extract category from MetaWeblog "category" element
   252		 # first, then try with Blogger's "<category>" meta-element if any.
   253	
   254		 if ((tmp3=mw["category"]) == _NULL && _request("0_0",1) != _NULL) {
   255		    tmp3 = tmp2				# load post body
   256		    if (gsub(/(.*<category>|<\/category>.*)/,_NULL,tmp3) != 2)
   257								     tmp3 = _NULL
   258		 }
   259	
   260		 if (tmp3 != _NULL) {
   261	
   262		    # If the client specified an explicit category for the page
   263		    # I'll take that argument into account only if this is a new
   264		    # page, otherwise the explicit category will be ignored because
   265		    # an old page must retain whatever category it belongs into,
   266		    # and the relevant value will be then taken from the page name.
   267	
   268		    if (_request("0_0",1) ~ /\.newPost$/) {
   269	
   270		       # The category name may not contain dots.
   271		       if (tmp3 ~ /\./) return(_sys("csaExit.fault 1025"))
   272	
   273		       # Since I've been passed an explicit category, I chop
   274		       # any implicit category name off the page name and replace
   275		       # it with the passed value. Any dots in page name after
   276		       # the category name can safely be left in place.
   277	
   278		       sub(/[^.]*\./,_NULL,tmp1)
   279		       tmp1 = tmp3 "." tmp1
   280		    }
   281		 }
   282	      }
   283	
   284	      # Strip title and category from post, both for RPC2 and REST
   285	      # (but they would not pass tidy(1) later-on anyway). Note how,
   286	      # given the current mechanism, if either <title> ... </title>
   287	      # or <category> ... </category> literal text is inserted in a page
   288	      # through RESTful editing, such text will likey be interpreted as
   289	      # the relevant meta-directives if the page is subsequently edited
   290	      # with blogging API clients. This issue can be solved by entering
   291	      # the opening bracket as "(:amp:)lt;" as opposed to literally,
   292	      # like this: "(:amp:)lt;title>" .
   293	
   294	      gsub(/<title>.*<\/title>/,_NULL,tmp2)
   295	      gsub(/<category>.*<\/category>/,_NULL,tmp2)
   296	
   297	      # Tell the underlying rc(1) program whether there's no content.
   298	      # Note that we cannot test for tags here (as we do in showPage)
   299	      # because the post may not yet contain any, depending on what
   300	      # was used to edit it on the client side.
   301	
   302	      #if (tmp2 !~ /[a-zA-Z0-9]/) _rcset("cgi.empty","1")
   303	      # Strongly non-latin languages require a broader test.
   304	      if (tmp2 ~ /^[ \t\n\r]*$/) _rcset("cgi.empty","1")
   305	
   306	      # Store the parsed Wiki content.
   307	      printf("%s",wikiFilter(tmp2,save)) > out
   308	
   309	      gsub(/[ \t\n\r]/," ",tmp1)		# neutralize real junk.
   310	
   311	      # Set actual page name, accounting for max allowed length.
   312	      value = _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]),_O_MIDDLE)
   313	
   314	      # page name must be at least 2-character long.
   315	      if (length(value) > 1) {
   316	
   317		 _rcset("cgi.page.uri",_uriencode(value,_O_PATHINFO))
   318		 tmp4 = value			# save for tags table.
   319	
   320		 # Accept a page literal name longer than the actual page name.
   321		 _rcset("cgi.page.literal",\
   322		   _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]+32),_O_MIDDLE))
   323	
   324		 value = unixify(value,1)
   325		 _rcset("cgi.page",value)
   326	
   327		 # extract unixified page meta-category if available.
   328		 if ((value=getcat(value)) != _NULL) _rcset("cgi.subcat",value)
   329	
   330		 # extract literal page meta-category if available.
   331		 value = _strip(substr(tmp1,1,ENVIRON["TNS_PAGE_MAXLEN"]),_O_MIDDLE)
   332		 if ((value=getcat(value)) != _NULL)
   333						_rcset("cgi.subcat.literal",value)
   334	
   335		 if (_request("0_0",1) != "blogger.deletePost") {
   336	
   337		    # Optional RESTful page short title/description.
   338	
   339		    if ((value=_request("6",1)) == _NULL) {
   340	
   341		       # Blogger API 1.0: use the previously saved value if any,
   342		       # and if no explicit description was supplied through
   343		       # the TW post title's extended syntax.
   344		       twtitle[2] == _NULL ? value = tmp : value = twtitle[2]
   345	
   346		       if (_request("0_0",1) != _NULL) {
   347	
   348			  # Blogger API 1.0 and MetaWeblog API: set page either
   349			  # hidden or visible, depending on the "publish" flag.
   350	
   351			  if (_bool(_request("6_1",1)) == _TRUE || \
   352					_bool(_request("5_1",1)) == _TRUE)
   353						 sub(/^[- ]*/,_NULL,value)
   354			  else sub(/^[- ]*/,"-",value)
   355		       }
   356		    }
   357	
   358		    if (value != _NULL) {
   359		       # Accept twice as many characters if the page title
   360		       # contains a redirection CPI.
   361		       value ~ /\(:redirect.*:\)/ ? i=2 : i=1
   362		       value = substr(value,1,ENVIRON["TNS_PAGE_MAXTITLE"]*i)
   363		       gsub(/[\t\r\n]+/," ",value)
   364		       #sub(/^[- ]+$/,"",value)
   365		       value = _strip(value,_O_MIDDLE)
   366		       _rcset("cgi.page.descr",value)
   367	
   368		       #tmp = value			# save again, for later.
   369		    }
   370		 }
   371	
   372		 # Optional author name of this change. If no explicit author was
   373		 # entered then try and get it from the relevant HTTP cookie if
   374		 # available,
   375	
   376		 if ((value=_request("7",1)) == _NULL) value = twtitle[4]
   377		 if (value == _NULL) value = _request("twauthor",1)
   378	
   379		 if (value != _NULL) {
   380		    value = substr(value,1,32)
   381		    gsub(/[\t\r\n<>]+/," ",value)
   382		    value = _strip(value,_O_MIDDLE)
   383	
   384		    # encode values that are to be sent in HTTP cookie headers.
   385		    _rcset("cgi.author",value)
   386		    _rcset("cgi.author.uri",_uriencode(value))
   387		 }
   388	
   389		 # Optional comma- or blank-separated list of authentication
   390		 # groups who are granted read access to this page; if this
   391		 # is not supplied then the page can be read by anyone.
   392	
   393		 if ((value=_request("5",1)) == _NULL) value = twtitle[6]
   394		 gsub(/ +/,",",value); gsub(/,+/,",",value)
   395		 gsub(/^,+/,"",value); gsub(/,+$/,"",value)
   396	
   397		 # remove any duplicated group names in user input.
   398	
   399		 if ((i=split(value,a,","))) {
   400		    value = _NULL
   401		    _sort(a,i)
   402		    i = _uniq(a,i)
   403		    while (i > 0)  {
   404		      if (value == _NULL) value = a[i]
   405		      else value = value "," a[i]
   406		      i--
   407		    }
   408		 }
   409	
   410	
   411		 # Authorization groups are like user IDs: they must always
   412		 # be lower-case, or things may become messy.
   413		 value = tolower(value)
   414	
   415		 if (value ~ /^([a-z]+[a-z0-9]+,?)+$/) _rcset("cgi.allow",value)
   416	
   417		 # Optional tags associated with this page. Multi-word tags are
   418		 # supported, with individual words separated by an underscore "_".
   419	
   420		 if ((value=_request("9",1)) == _NULL) value = twtitle[5]
   421	
   422		 gsub(/["\t\r\n<>]+/," ",value)			# just in case
   423		 gsub(/_+ +/," ",value); gsub(/__+/,"_",value)
   424		 sub(/^[ _]+/,"",value); sub(/[ _]+$/,"",value)
   425		 value = _strip(value,_O_MIDDLE)
   426		 value = substr(value,1,256); sub(/[ _]+$/,"",value)
   427	
   428		 # remove any duplicated tags in user input.
   429	
   430		 if ((i=split(value,a," "))) {
   431		    value = _NULL
   432		    for (j in a) {
   433			# preserve casing if local or title tag.
   434			if (a[j] !~ /^[MT]:/) a[j] = tolower(a[j])
   435		    }
   436		    _sort(a,i)
   437		    i = _uniq(a,i)
   438		    while (i > 0)  {
   439	
   440		      # Keyword-type tags may optionally be prefixed with "k:" .
   441		      # Loop over if nothing left after stripping "k:" .
   442		      if (sub(/^k:/,"",a[i]) && a[i] == "") {
   443			 i--
   444			 continue
   445		      }
   446	
   447		      # Handle geo-tags if any (retain only the first one).
   448		      # No checks for well-formedness are done here.
   449		      # Format is:  g:lat:long[:ISO_3166-2] .
   450		      # Note that for the geo tag not to have to be re-entered
   451		      # upon each edit session it will have to be saved
   452		      # in the tag+dat along with all other tags.
   453	
   454		      else if (a[i] ~ /^g:/) {
   455			 if (geo == "") geo = substr(a[i],3)
   456			 else a[i] = ""
   457		      }
   458	
   459		      # Handle local (i.e. "my") tag in a similar way. Again,
   460		      # retain only the first one. Only basic sanity checks are
   461		      # done here. Also the local tag is to be saved in tag+dat.
   462		      # Note that no tags may contain spaces, or they will be
   463		      # split into multiple unrelated tags. Like all tags, also
   464		      # the local one must be entered with spaces replaced by
   465		      # underscores. Unlike other tags, the local tag is not
   466		      # turned into lower-case (see above); of course this
   467		      # means that local tags that differ only in their casing 
   468		      # will produce separate entries in tag+dat.
   469	
   470		      else if (a[i] ~ /^M:/) {
   471			 if (my == "") my = substr(a[i],3)
   472			 else a[i] = ""
   473		      }
   474	
   475		      if (value == _NULL) value = a[i--]
   476		      else value = value " " a[i--]
   477		    }
   478		 }
   479	
   480		 if (value ~ /^([MT]:)?./) {
   481		    _rcset("cgi.tags",_strip(value))
   482		    value = " " value " "
   483		    # For distance searches to be possible (not yet implemented)
   484		    # the geo tag must also be saved in tag+dat.
   485		    #gsub(/ g:[^ ]+/,_NULL,value)
   486		    value = _strip(value,_O_MIDDLE)
   487		    _rcset("cgi.tags.xml",_xmlencode(value))
   488		    gsub(" ","\t" unixify(tmp4,1) "\t%s\n",value)
   489		    value = value "\t" unixify(tmp4,1) "\t%s\n"
   490		    _rcset("cgi.tags.tbl",value)
   491		 }
   492	
   493		 # Optional Geo tag (see above). Note that tags are xml-encoded
   494		 # here just in case they contain something weird, to ease the
   495		 # job of the underlying shell procedure.
   496	
   497		 if ((i=split(geo,a,":")) > 1) {
   498		    for (j=1; j<=i; j++) {
   499			# probably irrelevant, but nevertheless ...
   500			if (j == 3) a[j] = toupper(a[j])
   501			_rcset("cgi.tags.geo",_xmlencode(a[j]),_NULL,_O_APPEND)
   502		    }
   503		 }
   504	
   505		 # Optional date and time the content of the page relates to
   506		 # (mainly for news items).
   507	
   508		 if ((value=_strip(_request("8",1),_O_MIDDLE)) == _NULL)
   509					value = _strip(twtitle[3],_O_MIDDLE)
   510	
   511		 tmp = value				# extract ranking
   512		 sub(/ *,.*/,"",tmp)
   513		 if (tmp !~ /^[1-9]$/) tmp = "0"	# default ranking
   514		 sub(/^.*, */,"",value)
   515	
   516		 # several tweaks on reldate field, to try and catch/fix
   517		 # the most common mistakes.
   518	
   519		 if (value !~ /:/)
   520		 	value = value " " _TIME["H"] ":" _TIME["M"] ":" _TIME["S"]
   521		 sub(/[^0-9]+$/,_NULL,value)
   522		 # insert leading "0" if missing in time spec.
   523		 if (value ~ / [0-9]:/) sub(/ /," 0",value)
   524		 # trim any leading malformed time specs.
   525		 while (value ~ /:[0-9]$/) sub(/:[0-9]$/,_NULL,value)
   526		 # append trailing seconds if missing.
   527		 if (value ~ / [0-2][0-9]:[0-9][0-9]$/) sub(/$/,":00",value)
   528		 if (_isdate(value,1) == _TRUE) value = _localdate(value,1)
   529		 value = _isodate(value)
   530		 if (_isdate(value) == _TRUE) _rcset("cgi.reldate",tmp "," value)
   531	
   532		 # Optional page expiration date and time (will apply also to any
   533		 # page private attachments).
   534	
   535		 if ((value=_strip(_request("13",1),_O_MIDDLE)) == _NULL)
   536					value = _strip(twtitle[9],_O_MIDDLE)
   537	
   538		 # several tweaks on expdate field, to try and catch/fix
   539		 # the most common mistakes.
   540	
   541		 if (value != _NULL) {
   542		    if (value !~ /:/)
   543		 	value = value " " _TIME["H"] ":" _TIME["M"] ":" _TIME["S"]
   544		    sub(/[^0-9]+$/,_NULL,value)
   545		    # insert leading "0" if missing in time spec.
   546		    if (value ~ / [0-9]:/) sub(/ /," 0",value)
   547		    # trim any leading malformed time specs.
   548		    while (value ~ /:[0-9]$/) sub(/:[0-9]$/,_NULL,value)
   549		    # append trailing seconds if missing.
   550		    if (value ~ / [0-2][0-9]:[0-9][0-9]$/) sub(/$/,":00",value)
   551		    if (_isdate(value,1) == _TRUE) value = _localdate(value,1)
   552	
   553		    value = _isodate(value)
   554		    if (_isdate(value) == _TRUE) _rcset("cgi.expdate",value)
   555		 }
   556	
   557		 # Optional URL (loosely) associated with this page.
   558		 if ((value=_request("10",1)) == _NULL && \
   559			(value=mw["link"]) == _NULL) value = twtitle[7]
   560		 value = substr(_strip(value,_O_CRUSH),1,256)
   561		 if (_isuri(value) == _TRUE) _rcset("cgi.link",value)
   562	
   563		 # Optional store catalog metadata, see the specs.
   564		 if ((value=_request("11",1)) == _NULL) value = twtitle[8]
   565	
   566		 # Part nos. are turned into upper-case, and the whole
   567		 # field content is sanitized, to prevent garbage that
   568		 # certain spreadsheet programs used by the user to maintain
   569		 # price lists may have inserted in numeric fields.
   570	
   571		 value = toupper(substr(value,1,64))
   572		 gsub(/[^-.: A-Z0-9,]/," ",value)
   573		 value = _strip(value,_O_CRUSH)
   574		 if (value != _NULL) {
   575		    # apply basic sanity checks.
   576		    gsub(/ *, */,",",value)
   577		    #gsub(/:+/,":",value)	# probably NOT to be done.
   578		    gsub(/,+/,",",value)
   579		    if ((i=split(value,a," ")) > 3) i = 3	# ignore extra fields.
   580		    # prepend a leading colon if necessary.
   581		    if (a[1] !~ /:/) a[1] = ":" a[1]
   582		    sub(/:+$/,"",a[2]); sub(/:+$/,"",a[3]);
   583		    # set price2 equal to price1 if unspecified.
   584		    if (a[2] !~ /:/) a[2] = a[2] ":" a[2]
   585		    value = _NULL
   586		    for (j=1; j<=i; j++) value = value " " a[j]
   587		    _rcset("cgi.store",_strip(value))
   588		 }
   589	      }
   590	
   591	      # Toggle comments for this page. Comments are enabled/disabled if
   592	      # the relevant selection is either "true" or "false" respectively,
   593	      # otherwise they are left into whatever previous state. This is
   594	      # to save me the hassle of modifying 'editPage' in order to pre-set
   595	      # the relevant form value to show the current state.
   596	
   597	      if ((value=_request("12",1)) ~ /^(true|false)$/)
   598					_rcset("cgi.comments",value)
   599	
   600	      # 1st extra argument (current content checksum)
   601	      value = _request("3",1)
   602	      if (value ~ /^[a-zA-Z0-9]*$/) _rcset("cgi.checksum",value)
   603	
   604	      # The following test is necessary since the address could,
   605	      # at least in theory, have been set to any string by the
   606	      # remote user, due to how it is handled to cope with stunnel(8)
   607	      # and the lack of transproxy support in kernel 2.4.x.
   608	
   609	      if (_isipaddr(ENVIRON["REMOTE_ADDR"]) == _TRUE)
   610					value = ENVIRON["REMOTE_ADDR"]
   611	      else value = "0.0.0.0"
   612	
   613	      _rcset("REMOTE_ADDR",value)
   614	
   615	      # Next stuff to be done only if confirmed.
   616	
   617	      if (_csa("confirmed") != _TRUE) return
   618	
   619	      # These should never be null, but just in case ...
   620	      if (ENVIRON["SERVER_PORT"] == "") port = "80"
   621	      else port = ENVIRON["SERVER_PORT"]
   622	
   623	      if (ENVIRON["SERVER_NAME"] == "") host = ENVIRON["CSA_FQDN"]
   624	      else host = ENVIRON["SERVER_NAME"]
   625	
   626	      # Try and extract target URLs from content body, and build a
   627	      # batch pingback script with them. This code makes no distinction
   628	      # between local and remote URLs and treats both the same. That is,
   629	      # both pings will be sent by using the pingback autodiscovery
   630	      # protocol and will work only with those target pages that support
   631	      # it. Also, since this code must work with both XHTML source and
   632	      # parsewiki(1) content, it will take into account all URLs, even
   633	      # those which are enclosed in XML comment blocks "<!-- ... ->" .
   634	
   635	      value = tmp2				# restore saved body
   636	
   637	      save = value				# save for later
   638	
   639	      # source URL (i.e. ourselves).
   640	      value = ENVIRON["CSA_REQUEST_URI"]
   641	      gsub(/.UNKNOWN/,_NULL,value)		# can be /UNKNOWN?UNKNOWN
   642	
   643	      if (value !~ /^[-_.a-zA-Z0-9%+\/&=?$;:,#]+$/)
   644					value = _uriencode(value)
   645	
   646	      gsub(/%2[Ff]/,"/",value)			# un-encode slashes
   647	
   648	      re = "^/" _escreg(ENVIRON["CSA_ID"]) "(/I)?"
   649	      sub(re,_NULL,value)
   650	
   651	      value = ENVIRON["CSA_RPC_URI"] value
   652	
   653	      len = 207 + length(_xmlencode(value))
   654	
   655	      # No support for https URLs at the moment, because nc(1) does
   656	      # not handle them and something like openssl(1) would be needed.
   657	      # I may implemented this the future, although it is rather
   658	      # uncommon that a pingback-server be reachable through SSL.
   659	
   660	      i = split(save,a,/(http|HTTP):/)
   661	
   662	      # destination URLs.
   663	
   664	      print "# This program parses potentially insecure\n" \
   665		    "# external arguments; for better security it\n" \
   666		    "# must be run through the rc(1) shell, as opposed\n" \
   667		    "# to Bourne sh(1) and derivatives.\n" > out1
   668	
   669	      for (j=1; j<=i; j++) {
   670	
   671		  # skip blatantly invalid URLs.
   672		  if (a[j] !~ /^\/\/([a-zA-Z0-9][-a-zA-Z0-9]*\.)+/) continue
   673	
   674		  # truncate at first really odd character.
   675		  sub(/[ \t\r\n'"].*/,_NULL,a[j])
   676	
   677		  # still garbage? skip URL.
   678		  if (a[j] !~ /^[-_.a-zA-Z0-9%+\/&=?$;:,#]+$/) continue
   679	
   680		  # Try and extract host, port and path from URI.
   681		  # In fact, all this will have to be done by the
   682		  # pb-discover utility, so I leave it out here.
   683		  #
   684		  #host = port = path = _NULL		# clear on each loop.
   685		  #
   686		  #host = a[j]
   687		  #sub(/^\/\//,"",host)
   688		  #sub(/[\/?:].*/,"",host)
   689		  #
   690		  #port = a[j]
   691		  #sub(/^\/\//,"",port)
   692		  #sub(/[\/?].*/,"",port)
   693		  #if (sub(/.*:/,"",port) && port ~ /^[1-9][0-9]+$/);
   694		  #else port = "80"
   695		  #
   696		  #path = a[j]
   697		  #sub(/^\/\/[^\/?]+/,"",path)
   698		  #path = _xmldecode(path)
   699		  #if (path ~ /<>/) path = "/"
   700	
   701		  print "nl='\n'\npbserver=``($nl){pb-discover " \
   702			_rcescape("http:" _xmldecode(a[j])) "}\n" \
   703			"if (~ $#pbserver 3) {	# need host,port,path\n" \
   704			"{echo POST $pbserver(3) HTTP/1.0\n" \
   705			"echo Host: $pbserver(1)\n" \
   706			"cat <<'EOF'\n" \
   707			"User-Agent: CSA/" ENVIRON["CSA_VERSION"] "\n" \
   708			"Content-Type: text/xml\n" \
   709			"Content-Length: " \
   710			len + length("http:" _xmlencode(a[j])) \
   711			"\n\n<?xml version=\"1.0\"?>\n" \
   712			"<methodCall>\n" \
   713			"<methodName>pingback.ping</methodName>\n" \
   714			"<params>\n<param>\n<value><string>" _xmlencode(value) \
   715			"</string></value>\n</param>\n<param>\n<value><string>" \
   716			_xmlencode("http:" a[j]) \
   717			"</string></value>\n</param>\n</params>\n" \
   718			"</methodCall>\nEOF\n}|" \
   719			"nc -w 10 $pbserver(1) $pbserver(2);sleep 1}\n" > out1
   720	      }
   721	   }
   722	
   723	   else {					# response
   724	
   725	      if (_csa("confirmed") == _TRUE) {
   726	
   727		 out1 = _rcget("TNS_PINGBACK_QUEUE")
   728	
   729		 # Ping supported remote indexing engines where appropriate.
   730		 # Make sure the output file is appended to, as it may already
   731		 # contain stuff written during _O_REQUEST !
   732		 #
   733		 # Add more special pingable services below as needed.
   734	
   735		 if (_bool(ENVIRON["TNS_PING_TECHNORATI"]) == _TRUE)
   736	      					pingSpecial(out1,"technorati")
   737	      }
   738	
   739	      # generic template conditionals.
   740	
   741	      ifsections()
   742	
   743	      # Default page title to page name and set page subcat name if any.
   744	
   745	      value = tmp = _rcget("tpl.var.tw.page",1)
   746	
   747	      if ((tmp=getcat(tmp)) != _NULL) {
   748		 _response("tpl.var.tw.page.subcat",tmp)
   749		 sub(/[^.]+\./,_NULL,value)
   750		 _html("show","tpl.if.tw.subcat","tpl.fi.tw.subcat")
   751	      }  else _html("hide","tpl.if.tw.subcat","tpl.fi.tw.subcat")
   752	
   753	      # I cannot use the content of a possible "T:" tag here, because
   754	      # it is not yet individually available in the underlying rc(1)
   755	      # script at this stage. This isn't a problem though, because that
   756	      # tag is mostly meant for showPage, so not using it here is OK.
   757	
   758	      _response("tpl.var.html.title",value)
   759	   }
   760	} 
   761	
   762	# EOF