-- neturl.lua - a robust url parser and builder -- -- Bertrand Mansion, 2011-2013; License MIT -- @module neturl -- @alias M local M = {} M.version = "0.9.0" --- url options -- separator is set to `&` by default but could be anything like `&` or `;` -- @todo Add an option to limit the size of the argument table M.options = { separator = '&' } --- list of known and common scheme ports -- as documented in IANA URI scheme list M.services = { acap = 674, cap = 1026, dict = 2628, ftp = 21, gopher = 70, http = 80, https = 443, iax = 4569, icap = 1344, imap = 143, ipp = 631, ldap = 389, mtqp = 1038, mupdate = 3905, news = 2009, nfs = 2049, nntp = 119, rtsp = 554, sip = 5060, snmp = 161, telnet = 23, tftp = 69, vemmi = 575, afs = 1483, jms = 5673, rsync = 873, prospero = 191, videotex = 516 } local legal = { ["-"] = true, ["_"] = true, ["."] = true, ["!"] = true, ["~"] = true, ["*"] = true, ["'"] = true, ["("] = true, [")"] = true, [":"] = true, ["@"] = true, ["&"] = true, ["="] = true, ["+"] = true, ["$"] = true, [","] = true, [";"] = true -- can be used for parameters in path } local function decode(str) local str = str:gsub('+', ' ') return (str:gsub("%%(%x%x)", function(c) return string.char(tonumber(c, 16)) end)) end local function encode(str) return (str:gsub("([^A-Za-z0-9%_%.%-%~])", function(v) return string.upper(string.format("%%%02x", string.byte(v))) end)) end -- for query values, prefer + instead of %20 for spaces local function encodeValue(str) local str = encode(str) return str:gsub('%%20', '+') end local function encodeSegment(s) local legalEncode = function(c) if legal[c] then return c end return encode(c) end return s:gsub('([^a-zA-Z0-9])', legalEncode) end --- builds the url -- @return a string representing the built url function M:build() local url = '' if self.path then local path = self.path path:gsub("([^/]+)", function (s) return encodeSegment(s) end) url = url .. tostring(path) end if self.query then local qstring = tostring(self.query) if qstring ~= "" then url = url .. '?' .. qstring end end if self.host then local authority = self.host if self.port and self.scheme and M.services[self.scheme] ~= self.port then authority = authority .. ':' .. self.port end local userinfo if self.user and self.user ~= "" then userinfo = self.user if self.password then userinfo = userinfo .. ':' .. self.password end end if userinfo and userinfo ~= "" then authority = userinfo .. '@' .. authority end if authority then if url ~= "" then url = '//' .. authority .. '/' .. url:gsub('^/+', '') else url = '//' .. authority end end end if self.scheme then url = self.scheme .. ':' .. url end if self.fragment then url = url .. '#' .. self.fragment end return url end --- builds the querystring -- @param tab The key/value parameters -- @param sep The separator to use (optional) -- @param key The parent key if the value is multi-dimensional (optional) -- @return a string representing the built querystring function M.buildQuery(tab, sep, key) local query = {} if not sep then sep = M.options.separator or '&' end local keys = {} for k in pairs(tab) do keys[#keys+1] = k end table.sort(keys) for _,name in ipairs(keys) do local value = tab[name] name = encode(tostring(name)) if key then name = string.format('%s[%s]', tostring(key), tostring(name)) end if type(value) == 'table' then query[#query+1] = M.buildQuery(value, sep, name) else local value = encodeValue(tostring(value)) if value ~= "" then query[#query+1] = string.format('%s=%s', name, value) else query[#query+1] = name end end end return table.concat(query, sep) end --- Parses the querystring to a table -- This function can parse multidimensional pairs and is mostly compatible -- with PHP usage of brackets in key names like ?param[key]=value -- @param str The querystring to parse -- @param sep The separator between key/value pairs, defaults to `&` -- @todo limit the max number of parameters with M.options.max_parameters -- @return a table representing the query key/value pairs function M.parseQuery(str, sep) if not sep then sep = M.options.separator or '&' end local values = {} for key,val in str:gmatch(string.format('([^%q=]+)(=*[^%q=]*)', sep, sep)) do local key = decode(key) local keys = {} key = key:gsub('%[([^%]]*)%]', function(v) -- extract keys between balanced brackets if string.find(v, "^-?%d+$") then v = tonumber(v) else v = decode(v) end table.insert(keys, v) return "=" end) key = key:gsub('=+.*$', "") key = key:gsub('%s', "_") -- remove spaces in parameter name val = val:gsub('^=+', "") if not values[key] then values[key] = {} end if #keys > 0 and type(values[key]) ~= 'table' then values[key] = {} elseif #keys == 0 and type(values[key]) == 'table' then values[key] = decode(val) end local t = values[key] for i,k in ipairs(keys) do if type(t) ~= 'table' then t = {} end if k == "" then k = #t+1 end if not t[k] then t[k] = {} end if i == #keys then t[k] = decode(val) end t = t[k] end end setmetatable(values, { __tostring = M.buildQuery }) return values end --- set the url query -- @param query Can be a string to parse or a table of key/value pairs -- @return a table representing the query key/value pairs function M:setQuery(query) local query = query if type(query) == 'table' then query = M.buildQuery(query) end self.query = M.parseQuery(query) return query end --- set the authority part of the url -- The authority is parsed to find the user, password, port and host if available. -- @param authority The string representing the authority -- @return a string with what remains after the authority was parsed function M:setAuthority(authority) self.authority = authority self.port = nil self.host = nil self.userinfo = nil self.user = nil self.password = nil authority = authority:gsub('^([^@]*)@', function(v) self.userinfo = v return '' end) authority = authority:gsub("^%[[^%]]+%]", function(v) -- ipv6 self.host = v return '' end) authority = authority:gsub(':([^:]*)$', function(v) self.port = tonumber(v) return '' end) if authority ~= '' and not self.host then self.host = authority:lower() end if self.userinfo then local userinfo = self.userinfo userinfo = userinfo:gsub(':([^:]*)$', function(v) self.password = v return '' end) self.user = userinfo end return authority end --- Parse the url into the designated parts. -- Depending on the url, the following parts can be available: -- scheme, userinfo, user, password, authority, host, port, path, -- query, fragment -- @param url Url string -- @return a table with the different parts and a few other functions function M.parse(url) local comp = {} M.setAuthority(comp, "") M.setQuery(comp, "") local url = tostring(url or '') url = url:gsub('#(.*)$', function(v) comp.fragment = v return '' end) url =url:gsub('^([%w][%w%+%-%.]*)%:', function(v) comp.scheme = v:lower() return '' end) url = url:gsub('%?(.*)', function(v) M.setQuery(comp, v) return '' end) url = url:gsub('^//([^/]*)', function(v) M.setAuthority(comp, v) return '' end) comp.path = decode(url) setmetatable(comp, { __index = M, __tostring = M.build} ) return comp end --- removes dots and slashes in urls when possible -- This function will also remove multiple slashes -- @param path The string representing the path to clean -- @return a string of the path without unnecessary dots and segments function M.removeDotSegments(path) local fields = {} if string.len(path) == 0 then return "" end local startslash = false local endslash = false if string.sub(path, 1, 1) == "/" then startslash = true end if (string.len(path) > 1 or startslash == false) and string.sub(path, -1) == "/" then endslash = true end path:gsub('[^/]+', function(c) table.insert(fields, c) end) local new = {} local j = 0 for i,c in ipairs(fields) do if c == '..' then if j > 0 then j = j - 1 end elseif c ~= "." then j = j + 1 new[j] = c end end local ret = "" if #new > 0 and j > 0 then ret = table.concat(new, '/', 1, j) else ret = "" end if startslash then ret = '/'..ret end if endslash then ret = ret..'/' end return ret end local function absolutePath(base_path, relative_path) if string.sub(relative_path, 1, 1) == "/" then return '/' .. string.gsub(relative_path, '^[%./]+', '') end local path = base_path if relative_path ~= "" then path = '/'..path:gsub("[^/]*$", "") end path = path .. relative_path path = path:gsub("([^/]*%./)", function (s) if s ~= "./" then return s else return "" end end) path = string.gsub(path, "/%.$", "/") local reduced while reduced ~= path do reduced = path path = string.gsub(reduced, "([^/]*/%.%./)", function (s) if s ~= "../../" then return "" else return s end end) end path = string.gsub(path, "([^/]*/%.%.?)$", function (s) if s ~= "../.." then return "" else return s end end) local reduced while reduced ~= path do reduced = path path = string.gsub(reduced, '^/?%.%./', '') end return '/' .. path end --- builds a new url by using the one given as parameter and resolving paths -- @param other A string or a table representing a url -- @return a new url table function M:resolve(other) if type(self) == "string" then self = M.parse(self) end if type(other) == "string" then other = M.parse(other) end if other.scheme then return other else other.scheme = self.scheme if not other.authority or other.authority == "" then other:setAuthority(self.authority) if not other.path or other.path == "" then other.path = self.path local query = other.query if not query or not next(query) then other.query = self.query end else other.path = absolutePath(self.path, other.path) end end return other end end --- normalize a url path following some common normalization rules -- described on The URL normalization page of Wikipedia -- @return the normalized path function M:normalize() if type(self) == 'string' then self = M.parse(self) end if self.path then local path = self.path path = absolutePath(path, "") -- normalize multiple slashes path = string.gsub(path, "//+", "/") self.path = path end return self end return M