Module:Base64: Difference between revisions

From Celeste Wiki
Jump to navigation Jump to search
Content added Content deleted
No edit summary
(Gah)
Tag: Replaced
Line 6: Line 6:


local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding
local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding




--[[**************************************************************************]]
-- base64.lua
-- Copyright 2014 Ernest R. Ewert
--
-- This Lua module contains the implementation of a Lua base64 encode
-- and decode library.
--
-- The library exposes these methods.
--
-- Method Args
-- ----------- ----------------------------------------------
-- encode String in / out
-- decode String in / out
--
-- encode String, function(value) predicate
-- decode String, function(value) predicate
--
-- encode file, function(value) predicate
-- deocde file, function(value) predicate
--
-- encode file, file
-- deocde file, file
--
-- alpha alphabet, term char
--


--------------------------------------------------------------------------------
-- known_base64_alphabets
--
--
local known_base64_alphabets=
{
base64= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/%=]",
_term="="
},

base64noterm= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/]",
_term=""
},

base64url= -- RFC 4648 'base64url'
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
_strip="[^%a%d%+%-%_=]",
_term=""
},
}
local c_alpha=known_base64_alphabets.base64
local pattern_strip

--[[**************************************************************************]]
--[[****************************** Encoding **********************************]]
--[[**************************************************************************]]


-- Precomputed tables (compromise using more memory for speed)
local b64e -- 6 bit patterns to ANSI 'char' values
local b64e_a -- ready to use
local b64e_a2 -- byte addend
local b64e_b1 -- byte addend
local b64e_b2 -- byte addend
local b64e_c1 -- byte addend
local b64e_c -- ready to use


-- Tail padding values
local tail_padd64=
{
"==", -- two bytes modulo
"=" -- one byte modulo
}


--------------------------------------------------------------------------------
-- e64
--
-- Helper function to convert three eight bit values into four encoded
-- 6 (significant) bit values.
--
-- 7 0 7 0 7 0
-- e64(a a a a a a a a,b b b b b b b b,c c c c c c c c)
-- | | | |
-- return [ a a a a a a]| | |
-- [ a a b b b b]| |
-- [ b b b b c c]|
-- [ c c c c c c]
--
local function e64( a, b, c )
-- Return pre-calculated values for encoded value 1 and 4
-- Get the pre-calculated extractions for value 2 and 3, look them
-- up and return the proper value.
--
return b64e_a[a],
b64e[ b64e_a2[a]+b64e_b1[b] ],
b64e[ b64e_b2[b]+b64e_c1[c] ],
b64e_c[c]
end


--------------------------------------------------------------------------------
-- encode_tail64
--
-- Send a tail pad value to the output predicate provided.
--
local function encode_tail64( out, x, y )
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if x ~= nil then
local a,b,r = x,0,1

if y ~= nil then
r = 2
b = y
end

-- Encode three bytes of info, with the tail byte as zeros and
-- ignore any fourth encoded ASCII value. (We should NOT have a
-- forth byte at this point.)
local b1, b2, b3 = e64( a, b, 0 )

-- always add the first 2 six bit values to the res table
-- 1 remainder input byte needs 8 output bits
local tail_value = string.char( b1, b2 )

-- two remainder input bytes will need 18 output bits (2 as pad)
if r == 2 then
tail_value=tail_value..string.char( b3 )
end

-- send the last 4 byte sequence with appropriate tail padding
out( tail_value .. tail_padd64[r] )
end
end


--------------------------------------------------------------------------------
-- encode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper encoding.
--
local function encode64_io_iterator(file)

assert( io.type(file) == "file", "argument must be readable file handle" )
assert( file.read ~= nil, "argument must be readable file handle" )

local ii = { } -- Table for the input iterator

setmetatable(ii,{ __tostring=function() return "base64.io_iterator" end})

-- Begin returns an input read iterator
--
function ii.begin()
local sb = string.byte

-- The iterator returns three bytes from the file for encoding or nil
-- when the end of the file has been reached.
--
return function()
s = file:read(3)
if s ~= nil and #s == 3 then
return sb(s,1,3)
end
return nil
end
end

-- The tail method on the iterator allows the routines to run faster
-- because each sequence of bytes doesn't have to test for EOF.
--
function ii.tail()
-- If one or two "overflow" bytes exist, return those.
--
if s ~= nil then return s:byte(1,2) end
end

return ii
end


--------------------------------------------------------------------------------
-- encode64_with_ii
--
-- Convert the value provided by an encode iterator that provides a begin
-- method, a tail method, and an iterator that returns three bytes for
-- each call until at the end. The tail method should return either 1 or 2
-- tail bytes (for source values that are not evenly divisible by three).
--
local function encode64_with_ii( ii, out )
local sc=string.char

for a, b, c in ii.begin() do
out( sc( e64( a, b, c ) ) )
end

encode_tail64( out, ii.tail() )

end


--------------------------------------------------------------------------------
-- encode64_with_predicate
--
-- Implements the basic raw data --> base64 conversion. Each three byte
-- sequence in the input string is converted to the encoded string and
-- given to the predicate provided in 4 output byte chunks. This method
-- is slightly faster for traversing existing strings in memory.
--
local function encode64_with_predicate( raw, out )
local rem=#raw%3 -- remainder
local len=#raw-rem -- 3 byte input adjusted
local sb=string.byte -- Mostly notational (slight performance)
local sc=string.char -- Mostly notational (slight performance)

-- Main encode loop converts three input bytes to 4 base64 encoded
-- ACSII values and calls the predicate with the value.
for i=1,len,3 do
-- This really isn't intended as obfuscation. It is more about
-- loop optimization and removing temporaries.
--
out( sc( e64( sb( raw ,i , i+3 ) ) ) )
-- | | |
-- | | byte i to i + 3
-- | |
-- | returns 4 encoded values
-- |
-- creates a string with the 4 returned values
end

-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if rem > 0 then
local x, y = sb( raw, len+1 )

if rem > 1 then
y = sb( raw, len+2 )
end

encode_tail64( out, x, y )
end
end


--------------------------------------------------------------------------------
-- encode64_tostring
--
-- Convenience method that accepts a string value and returns the
-- encoded version of that string.
--
local function encode64_tostring(raw)

local sb={} -- table to build string

local function collection_predicate(v)
sb[#sb+1]=v
end

-- Test with an 818K string in memory. Result is 1.1M of data.
--
-- lua_base64 base64 (gnu 8.21)
-- 202ms 54ms
-- 203ms 48ms
-- 204ms 50ms
-- 203ms 42ms
-- 205ms 46ms
--
encode64_with_predicate( raw, collection_predicate )

return table.concat(sb)
end


--[[**************************************************************************]]
--[[****************************** Decoding **********************************]]
--[[**************************************************************************]]


-- Precomputed tables (compromise using more memory for speed)
local b64d -- ANSI 'char' to right shifted bit pattern
local b64d_a1 -- byte addend
local b64d_a2 -- byte addend
local b64d_b1 -- byte addend
local b64d_b2 -- byte addend
local b64d_c1 -- byte addend
local b64d_z -- zero


--------------------------------------------------------------------------------
-- d64
--
-- Helper function to convert four six bit values into three full eight
-- bit values. Input values are the integer expression of the six bit value
-- encoded in the original base64 encoded string.
--
-- d64( _ _1 1 1 1 1 1,
-- | _ _ 2 2 2 2 2 2,
-- | | _ _ 3 3 3 3 3 3,
-- | | | _ _ 4 4 4 4 4 4)
-- | | | |
-- return ', [1 1 1 1 1 1 2 2] | |
-- ', [2 2 2 2 3 3 3 3] |
-- ' [3 3 4 4 4 4 4 4]
--
local function d64( b1, b2, b3, b4 )
-- We can get away with addition instead of anding the values together
-- because there are no overlapping bit patterns.
--
return
b64d_a1[b1] + b64d_a2[b2],
b64d_b1[b2] + b64d_b2[b3],
b64d_c1[b3] + b64d[b4]
end


--------------------------------------------------------------------------------
-- decode_tail64
--
-- Send the end of stream bytes that didn't get decoded via the main loop.
--
local function decode_tail64( out, e1, e2 ,e3, e4 )

if tail_padd64[2] == "" or e4 == tail_padd64[2]:byte() then
local n3 = b64d_z

if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
n3 = e3
end

-- Unpack the six bit values into the 8 bit values
local b1, b2 = d64( e1, e2, n3, b64d_z )

-- And add them to the res table
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
out( string.char( b1, b2 ) )
else
out( string.char( b1 ) )
end
end
end


--------------------------------------------------------------------------------
-- decode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper decoding.
--
local function decode64_io_iterator( file )

local ii = { }

-- An enumeration coroutine that handles the reading of an input file
-- to break data into proper pieces for building the original string.
--
local function enummerate( file )
local sc=string.char
local sb=string.byte
local ll="" -- last line storage
local len
local yield = coroutine.yield

-- Read a "reasonable amount" of data into the line buffer. Line by
-- line is not used so that a file with no line breaks doesn't
-- cause an inordinate amount of memory usage.
--
for cl in file:lines(2048) do
-- Reset the current line to contain valid chars and any previous
-- "leftover" bytes from the previous read
--
cl = ll .. cl:gsub(pattern_strip,"")
-- | |
-- | Remove "Invalid" chars (white space etc)
-- |
-- Left over from last line
--
len = (#cl-4)-(#cl%4)

-- see the comments in decode64_with_predicate for a rundown of
-- the results of this loop (sans the coroutine)
for i=1,len,4 do
yield( sc( d64( sb( cl, i, i+4 ) ) ) )
end

ll = cl:sub( len +1, #cl )
end

local l = #ll

if l >= 4 and ll:sub(-1) ~= tail_padd64[2] then
yield( sc( d64( sb( ll, 1, 4 ) ) ) )
l=l-4
end

if l > 0 then

local e1,e2,e3,e4 = ll:byte( 0 - l, -1 )

if e1 ~= nil then
decode_tail64( function(s) yield( s ) end, e1, e2, e3, e4 )
end
end

end

-- Returns an input iterator that is implemented as a coroutine. Each
-- yield of the co-routine sends reconstructed bytes to the loop handling
-- the iteration.
--
function ii.begin()
local co = coroutine.create( function() enummerate(file) end )

return function()
local code,res = coroutine.resume(co)
assert(code == true)
return res
end
end

return ii
end


--------------------------------------------------------------------------------
-- decode64_with_ii
--
-- Convert the value provided by a decode iterator that provides a begin
-- method, a tail method, and an iterator that returns four (usable!) bytes
-- for each call until at the end.
--
local function decode64_with_ii( ii, out )

-- Uses the iterator to pull values. Each reconstructed string
-- is sent to the output predicate.
--
for l in ii.begin() do out( l ) end

end


--------------------------------------------------------------------------------
-- decode64_with_predicate
--
-- Decode an entire base64 encoded string in memory using the predicate for
-- output.
--
local function decode64_with_predicate( raw, out )
-- Sanitize the input to strip characters that are not in the alphabet.
--
-- Note: This is a deviation from strict implementations where "bad data"
-- in the input stream is unsupported.
--
local san = raw:gsub(pattern_strip,"")
local len = #san-#san%4
local rem = #san-len
local sc = string.char
local sb = string.byte

if san:sub(-1,-1) == tail_padd64[2] then
rem = rem + 4
len = len - 4
end

for i=1,len,4 do
out( sc( d64( sb( san, i, i+4 ) ) ) )
end

if rem > 0 then
decode_tail64( out, sb( san, 0-rem, -1 ) )
end
end


--------------------------------------------------------------------------------
-- decode64_tostring
--
-- Takes a string that is encoded in base64 and returns the decoded value in
-- a new string.
--
local function decode64_tostring( raw )

local sb={} -- table to build string

local function collection_predicate(v)
sb[#sb+1]=v
end

decode64_with_predicate( raw, collection_predicate )

return table.concat(sb)
end


--------------------------------------------------------------------------------
-- set_and_get_alphabet
--
-- Sets and returns the encode / decode alphabet.
--
--
local function set_and_get_alphabet(alpha,term)

if alpha ~= nil then
local magic=
{
-- ["%"]="%%",
[" "]="% ",
["^"]="%^",
["$"]="%$",
["("]="%(",
[")"]="%)",
["."]="%.",
["["]="%[",
["]"]="%]",
["*"]="%*",
["+"]="%+",
["-"]="%-",
["?"]="%?",
}

c_alpha=known_base64_alphabets[alpha]
if c_alpha == nil then
c_alpha={ _alpha=alpha, _term=term }
end

assert( #c_alpha._alpha == 64, "The alphabet ~must~ be 64 unique values." )
assert( #c_alpha._term <= 1, "Specify zero or one termination character.")

b64d={} -- Decode table alpha -> right shifted int values
b64e={} -- Encode table 0-63 (6 bits) -> char table
local s=""
for i = 1,64 do
local byte = c_alpha._alpha:byte(i)
local str = string.char(byte)
b64e[i-1]=byte
assert( b64d[byte] == nil, "Duplicate value '"..str.."'" )
b64d[byte]=i-1
s=s..str
end


local ext --Alias for extraction routine that avoids extra table lookups

if bit32 then
ext = bit32.extract -- slight speed, vast visual (IMO)
elseif bit then
local band = bit.band
local rshift = bit.rshift
ext =
function(n, field, width)
width = width or 1
return band(rshift(n, field), 2^width-1)
end
else
error("Neither Lua 5.2 bit32 nor LuaJit bit library found!")
end

-- preload encode lookup tables
b64e_a = {}
b64e_a2 = {}
b64e_b1 = {}
b64e_b2 = {}
b64e_c1 = {}
b64e_c = {}

for f = 0,255 do
b64e_a [f]=b64e[ext(f,2,6)]
b64e_a2 [f]=ext(f,0,2)*16
b64e_b1 [f]=ext(f,4,4)
b64e_b2 [f]=ext(f,0,4)*4
b64e_c1 [f]=ext(f,6,2)
b64e_c [f]=b64e[ext(f,0,6)]
end

-- preload decode lookup tables
b64d_a1 = {}
b64d_a2 = {}
b64d_b1 = {}
b64d_b2 = {}
b64d_c1 = {}
b64d_z = b64e[0]

for k,v in pairs(b64d) do
-- Each comment shows the rough C expression that would be used to
-- generate the returned triple.
--
b64d_a1 [k] = v*4 -- ([b1] ) << 2
b64d_a2 [k] = math.floor( v / 16 ) -- ([b2] & 0x30) >> 4
b64d_b1 [k] = ext( v, 0, 4 ) * 16 -- ([b2] & 0x0F) << 4
b64d_b2 [k] = math.floor( v / 4 ) -- ([b3] & 0x3c) >> 2
b64d_c1 [k] = ext( v, 0, 2 ) * 64 -- ([b3] & 0x03) << 6
end

if c_alpha._term ~= "" then
tail_padd64[1]=string.char(c_alpha._term:byte(),c_alpha._term:byte())
tail_padd64[2]=string.char(c_alpha._term:byte())
else
tail_padd64[1]=""
tail_padd64[2]=""
end

local esc_term

if magic[c_alpha._term] ~= nil then
esc_term=c_alpha._term:gsub(magic[c_alpha._term],function (s) return magic[s] end)
elseif c_alpha._term == "%" then
esc_term = "%%"
else
esc_term=c_alpha._term
end

if not c_alpha._strip then
local p=s:gsub("%%",function (s) return "__unique__" end)
for k,v in pairs(magic)
do
p=p:gsub(v,function (s) return magic[s] end )
end
local mr=p:gsub("__unique__",function() return "%%" end)

c_alpha._strip = string.format("[^%s%s]",mr,esc_term)
end

assert( c_alpha._strip )

pattern_strip = c_alpha._strip

local c =0 for i in pairs(b64d) do c=c+1 end

assert( c_alpha._alpha == s, "Integrity error." )
assert( c == 64, "The alphabet must be 64 unique values." )
if esc_term ~= "" then
assert( not c_alpha._alpha:find(esc_term), "Tail characters must not exist in alphabet." )
end

if known_base64_alphabets[alpha] == nil then
known_base64_alphabets[alpha]=c_alpha
end
end

return c_alpha._alpha,c_alpha._term
end


--------------------------------------------------------------------------------
-- encode64
--
-- Entry point mode selector.
--
--
local function encode64(i,o)
local method

if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end

if type(i) == "string" then
if type(o) == "function" then
method = encode64_with_predicate
else
assert( o == nil, "unsupported request")
method = encode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = encode64_io_iterator(i)
method = encode64_with_ii
else
assert( false, "unsupported mode" )
end

return method(i,o)
end


--------------------------------------------------------------------------------
-- decode64
--
-- Entry point mode selector.
--
--
local function decode64(i,o)
local method

if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end

if type(i) == "string" then
if type(o) == "function" then
method = decode64_with_predicate
else
assert( o == nil, "unsupported request")
method = decode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = decode64_io_iterator(i)
method = decode64_with_ii
else
assert( false, "unsupported mode" )
end

return method(i,o)
end

set_and_get_alphabet("base64")

--[[**************************************************************************]]
--[[****************************** Module **********************************]]
--[[**************************************************************************]]




function p.base64(frame)
function p.base64(frame)
Line 743: Line 19:
encode = args.encode
encode = args.encode
yesno(encode, false)
yesno(encode, false)




-- encoding
local function enc(data)
return ((data:gsub('.', function(x)
local r,b='',x:byte()
for i=8,1,-1 do r=r..(b%2^i-b%2^(i-1)>0 and '1' or '0') end
return r;
end)..'0000'):gsub('%d%d%d?%d?%d?%d?', function(x)
if (#x < 6) then return '' end
local c=0
for i=1,6 do c=c+(x:sub(i,i)=='1' and 2^(6-i) or 0) end
return b:sub(c+1,c+1)
end)..({ '', '==', '=' })[#data%3+1])
end

-- decoding
local function dec(data)
data = string.gsub(data, '[^'..b..'=]', '')
return (data:gsub('.', function(x)
if (x == '=') then return '' end
local r,f='',(b:find(x)-1)
for i=6,1,-1 do r=r..(f%2^i-f%2^(i-1)>0 and '1' or '0') end
return r;
end):gsub('%d%d%d?%d?%d?%d?%d?%d?', function(x)
if (#x ~= 8) then return '' end
local c=0
for i=1,8 do c=c+(x:sub(i,i)=='1' and 2^(8-i) or 0) end
return string.char(c)
end))
end


if (encode == true) then
if (encode == true) then
base64.encode(data)
enc(data)
return
return r
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
else
else
base64.decode(data)
dec(data)
return
return r
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
end
end
end
end

return p
return p

Revision as of 14:59, 14 January 2021

Documentation for this module may be created at Module:Base64/doc

-- Made by [[User:Celeste]] - Licensed under GPLv3

local p = {}

local yesno = require('Module:Yesno') -- invoke Yesno

local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding

function p.base64(frame)
	local args
	local data
	local encode
	if frame == mw.getCurrentFrame() then
    	args = frame.args
	else
    	args = frame
	end
	data = args.data
	encode = args.encode
	yesno(encode, false)




-- encoding
local function enc(data)
    return ((data:gsub('.', function(x) 
        local r,b='',x:byte()
        for i=8,1,-1 do r=r..(b%2^i-b%2^(i-1)>0 and '1' or '0') end
        return r;
    end)..'0000'):gsub('%d%d%d?%d?%d?%d?', function(x)
        if (#x < 6) then return '' end
        local c=0
        for i=1,6 do c=c+(x:sub(i,i)=='1' and 2^(6-i) or 0) end
        return b:sub(c+1,c+1)
    end)..({ '', '==', '=' })[#data%3+1])
end

-- decoding
local function dec(data)
    data = string.gsub(data, '[^'..b..'=]', '')
    return (data:gsub('.', function(x)
        if (x == '=') then return '' end
        local r,f='',(b:find(x)-1)
        for i=6,1,-1 do r=r..(f%2^i-f%2^(i-1)>0 and '1' or '0') end
        return r;
    end):gsub('%d%d%d?%d?%d?%d?%d?%d?', function(x)
        if (#x ~= 8) then return '' end
        local c=0
        for i=1,8 do c=c+(x:sub(i,i)=='1' and 2^(8-i) or 0) end
            return string.char(c)
    end))
end

if (encode == true) then
		enc(data)
		return r
	else
		dec(data)
		return r
end
end

return p