1 # ===================================================================== 2 # rssFeed.awk: RPC I/O function for rpclib/rssFeed. 3 # 4 # Copyright (c) 2007,2008,2009,2010 Carlo Strozzi 5 # 6 # This program is free software; you can redistribute it and/or modify 7 # it under the terms of the GNU General Public License as published by 8 # the Free Software Foundation; version 2 dated June, 1991. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 # 19 # ===================================================================== 20 21 # ===================================================================== 22 # void _userproc(int mode) 23 # ===================================================================== 24 25 function _userproc(mode, value,url,a,b,i,j,title,g_uri,\ 26 p_name,page_dir,page,tmp,x) { 27 28 if (mode == _O_REQUEST) { # request. 29 30 # target group 31 value = _request("1",1) 32 33 # group must not be null and it may not contain the 34 # unescaped ``.'' character. 35 36 if (value != _NULL && value !~ /\./) { 37 _rcset("cgi.group",unixify(value)) 38 _rcset("cgi.group.literal",value) 39 } 40 41 # special page name corresponding to the desired view. 42 value = _request("2",1) 43 44 # special page names must begin with 'tw-'. 45 46 if (value ~ /^tw-[a-z][-a-z0-9]+-recent-(pages|headlines)$/) { 47 # extract subcat from cgi.tw.page if cgi.subcat is empty (this 48 # is necessary to have the "Print" action do the right thing). 49 sub(/^tw-/,"",value) 50 if (sub(/-recent-headlines$/,"",value)) 51 _rcset("cgi.tw.page","tw-recent-headlines") 52 else if (sub(/-recent-pages$/,"",value)) 53 _rcset("cgi.tw.page","tw-recent-pages") 54 _rcset("cgi.subcat",value) 55 } 56 else if (value ~ /^tw-/) 57 _rcset("cgi.tw.page",unixify(value,1)) 58 59 # optional target page within group (required by some views). 60 value = _request("3",1) 61 62 # page name must be at least 2-character long. 63 if (length(value) > 1) { 64 _rcset("cgi.page.literal",value) 65 _rcset("cgi.page.uri",_uriencode(value)) 66 value = unixify(value,1) 67 _rcset("cgi.page",value) 68 69 # Extract page meta-category if available. This will override 70 # any subcat extracted from cgi.tw.page (see above). 71 if ((value=getcat(value)) != _NULL) _rcset("cgi.subcat",value) 72 } 73 74 # The following test is necessary since the address could, 75 # at least in theory, have been set to any string by the 76 # remote user, due to how it is handled to cope with stunnel(8) 77 # and the lack of transproxy support in kernel 2.4.x. 78 79 if (_isipaddr(ENVIRON["REMOTE_ADDR"]) == _TRUE) 80 value = ENVIRON["REMOTE_ADDR"] 81 else value = "0.0.0.0" 82 83 _rcset("REMOTE_ADDR",value) 84 } 85 86 else { # response. 87 88 # Prepare empty array for _mrwresponse() (see below). 89 # This is alreaty a local var, but I want to make clear 90 # that it is an array, for documentational purposes. 91 delete b 92 93 title = _rcget("tbl_group.g_descr",1) 94 g_uri = _rcget("tbl_group.g_uri",1) 95 p_name = _rcget("cgi.tw.page.url") 96 page_dir = _rcget("tw_gstem") 97 98 if (_rcget("cgi.page") != _NULL) 99 p_name = p_name "/" _rcget("cgi.page") 100 101 # p_uri, p_[m,v]time, p_name, p_modau, p_descr [,k_page] 102 103 # RSS 1.0 response index. 104 105 x = 1 106 for (i=1; i<= _TBL[0]; i++) { 107 108 j = split(_TBL[i], a, "\t") 109 110 # Pages with nil descriptions are excluded from RSS feeds. 111 if (a[5] ~ /^ *- *$/) continue 112 113 if (a[1] ~ /^https?:\/\//) url = a[1] 114 115 else url = ENVIRON["CSA_RPC_URI"] "/" \ 116 ENVIRON["CSA_LANG"] "/" g_uri "/" a[1] 117 118 if (x++==1) { 119 _mrwresponse(_MRW,"channel","",ENVIRON["CSA_RPC_URI"] "/" \ 120 ENVIRON["CSA_LANG"] "/" g_uri "/" p_name,"","","title",title) 121 122 # make language strictly ISO-639, as described at this link: 123 # http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes 124 125 value = tolower(ENVIRON["CSA_LANG"]) 126 gsub(/_/,"-",value) 127 128 _mrwresponse(_MRW,"","","","","","link",\ 129 ENVIRON["CSA_RPC_URI"] "/" \ 130 ENVIRON["CSA_LANG"] "/" g_uri,b,b,1) 131 _mrwresponse(_MRW,"","","","","","dc:language",value) 132 if (ENVIRON["TNS_RSS_CREATOR"] != _NULL) _mrwresponse(_MRW,"","",\ 133 "","","","dc:creator",ENVIRON["TNS_RSS_CREATOR"]) 134 if (ENVIRON["TNS_RSS_RIGHTS"] != _NULL) _mrwresponse(_MRW,"","",\ 135 "","","","dc:rights",ENVIRON["TNS_RSS_RIGHTS"]) 136 if (ENVIRON["TNS_RSS_SUBTITLE"] != _NULL) _mrwresponse(_MRW,"","",\ 137 "","","","description",ENVIRON["TNS_RSS_SUBTITLE"]) 138 _mrwresponse(_MRW,"","","","","","dc:date",_rcget("CSA_TIME_ISO8601",1)) 139 _mrwresponse(_MRW,"","items","","rdf:Seq") 140 } 141 _mrwresponse(_MRW,"","","","","","rdf:li",url) 142 } 143 144 _mrwresponse(_MRW,".") # close the channel index. 145 146 # RSS 1.0 response body. 147 148 for (i=1; i<= _TBL[0]; i++) { 149 150 j = split(_TBL[i], a, "\t") 151 152 # Mangle according to http://www.w3.org/TR/NOTE-datetime . 153 sub(/^[^,]+,/,_NULL,a[2]); sub(/ /,"T",a[2]) 154 155 # Make feed readers happy by appending a plausible time zone 156 # if missing. This both for backward-compatibility with previous 157 # TW versions and for those RSS views that use 'vtime' instead 158 # of either 'mtime' or 'ctime', since 'vtime' is always in a 159 # simplified local time format. 160 161 if (a[2] !~ /[-+][:0-9]+$/) a[2] = a[2] _TIME[":z"] 162 163 # Pages with nil descriptions are excluded from RSS feeds. 164 if (a[5] ~ /^ *- *$/) continue 165 166 if (a[1] ~ /^https?:\/\//) url = a[1] 167 168 else url = ENVIRON["CSA_RPC_URI"] "/" \ 169 ENVIRON["CSA_LANG"] "/" g_uri "/" a[1] 170 171 if (a[6] == _NULL) { 172 _mrwresponse(_MRW,"item","",url) 173 value = a[3] 174 sub(/\./,": ",value) # improve subcat delimiter. 175 _mrwresponse(_MRW,"","","","","","title",value) 176 _mrwresponse(_MRW,"","","","","","link",url,b,b,1) 177 _mrwresponse(_MRW,"","","","","","dc:creator",a[4]) 178 _mrwresponse(_MRW,"","","","","","dc:date",a[2]) 179 180 # Omit page descriptions if equal to page names, 181 # regardless of the subcat trailer. 182 if (a[5] != substr(a[3],index(a[3],".")+1)) 183 _mrwresponse(_MRW,"","","","","","description",a[5]) 184 } 185 else { 186 page = page_dir "/" a[6] "+wki" 187 188 # Note: newlines MUST be preserved, or "
" sections 189 # will no longer work (the extra leading newline should 190 # not matter). 191 192 value = _NULL 193 while (getline tmp < page > 0) value = value "\n" tmp 194 close(page) 195 196 if (value !~ /[a-zA-Z0-9]/) continue # skip empty pages. 197 198 # Note how the TW concept of "abstract" works: an editor can 199 # select the portion of a page which will be used as the page 200 # abstract by surrounding such portion by suitable application- 201 # level wiki tags. This means that a page abstract isn't a 202 # separate piece of information but it is simply a selected 203 # part of the page body, that will be rendered ***in alternative 204 # to the actual page body*** in tw-recent-pages and possibly 205 # other static views views. When a page is rendered for normal 206 # display the abstract section is removed. 207 208 sub(/.*\(::ab:\)/,"",value) 209 sub(/\(:ab::\).*/,"",value) 210 211 # This would probably break the feed's XML well-formedness, 212 # so I use the simplified form below. 213 #if (sub(/\(:i:\).*/," ...",value)) 214 # value = "" value 215 216 sub(/\(:i:\).*/," ...",value) 217 218 _mrwresponse(_MRW,"item","",url) 219 tmp = a[3] 220 sub(/\./,": ",tmp) # improve subcat delimiter. 221 _mrwresponse(_MRW,"","","","","","title",tmp) 222 _mrwresponse(_MRW,"","","","","","link",url,b,b,1) 223 _mrwresponse(_MRW,"","","","","","dc:creator",a[4]) 224 _mrwresponse(_MRW,"","","","","","dc:date",a[2]) 225 226 # Save on overhead if there's no RDFa to parse. 227 if (value ~ /\(:v-/) { 228 if (_rcget("TNS_GROUP_MISC_PROP",4) == "rdfa") 229 value = _rdfacpi(value) 230 else value = _mfmtcpi(value) 231 } 232 233 # Page bodies may contain markup, so they need to be 234 # enclosed in a CDATA section. 235 236 value = "" 237 238 _mrwresponse(_MRW,"","","","","","description",value,b,b,1,"",1) 239 } 240 } 241 } 242 } 243 244 # EOF