Module:Base64: Difference between revisions

From Celeste Wiki
Jump to navigation Jump to search
Content added Content deleted
No edit summary
No edit summary
 
(11 intermediate revisions by the same user not shown)
Line 6: Line 6:


local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding
local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding




--[[**************************************************************************]]
-- base64.lua
-- Copyright 2014 Ernest R. Ewert
--
-- This Lua module contains the implementation of a Lua base64 encode
-- and decode library.
--
-- The library exposes these methods.
--
-- Method Args
-- ----------- ----------------------------------------------
-- encode String in / out
-- decode String in / out
--
-- encode String, function(value) predicate
-- decode String, function(value) predicate
--
-- encode file, function(value) predicate
-- deocde file, function(value) predicate
--
-- encode file, file
-- deocde file, file
--
-- alpha alphabet, term char
--


--------------------------------------------------------------------------------
-- known_base64_alphabets
--
--
local known_base64_alphabets=
{
base64= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/%=]",
_term="="
},

base64noterm= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/]",
_term=""
},

base64url= -- RFC 4648 'base64url'
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
_strip="[^%a%d%+%-%_=]",
_term=""
},
}
local c_alpha=known_base64_alphabets.base64
local pattern_strip

--[[**************************************************************************]]
--[[****************************** Encoding **********************************]]
--[[**************************************************************************]]


-- Precomputed tables (compromise using more memory for speed)
local b64e -- 6 bit patterns to ANSI 'char' values
local b64e_a -- ready to use
local b64e_a2 -- byte addend
local b64e_b1 -- byte addend
local b64e_b2 -- byte addend
local b64e_c1 -- byte addend
local b64e_c -- ready to use


-- Tail padding values
local tail_padd64=
{
"==", -- two bytes modulo
"=" -- one byte modulo
}


--------------------------------------------------------------------------------
-- e64
--
-- Helper function to convert three eight bit values into four encoded
-- 6 (significant) bit values.
--
-- 7 0 7 0 7 0
-- e64(a a a a a a a a,b b b b b b b b,c c c c c c c c)
-- | | | |
-- return [ a a a a a a]| | |
-- [ a a b b b b]| |
-- [ b b b b c c]|
-- [ c c c c c c]
--
local function e64( a, b, c )
-- Return pre-calculated values for encoded value 1 and 4
-- Get the pre-calculated extractions for value 2 and 3, look them
-- up and return the proper value.
--
return b64e_a[a],
b64e[ b64e_a2[a]+b64e_b1[b] ],
b64e[ b64e_b2[b]+b64e_c1[c] ],
b64e_c[c]
end


--------------------------------------------------------------------------------
-- encode_tail64
--
-- Send a tail pad value to the output predicate provided.
--
local function encode_tail64( out, x, y )
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if x ~= nil then
local a,b,r = x,0,1

if y ~= nil then
r = 2
b = y
end

-- Encode three bytes of info, with the tail byte as zeros and
-- ignore any fourth encoded ASCII value. (We should NOT have a
-- forth byte at this point.)
local b1, b2, b3 = e64( a, b, 0 )

-- always add the first 2 six bit values to the res table
-- 1 remainder input byte needs 8 output bits
local tail_value = string.char( b1, b2 )

-- two remainder input bytes will need 18 output bits (2 as pad)
if r == 2 then
tail_value=tail_value..string.char( b3 )
end

-- send the last 4 byte sequence with appropriate tail padding
out( tail_value .. tail_padd64[r] )
end
end


--------------------------------------------------------------------------------
-- encode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper encoding.
--
local function encode64_io_iterator(file)

assert( io.type(file) == "file", "argument must be readable file handle" )
assert( file.read ~= nil, "argument must be readable file handle" )

local ii = { } -- Table for the input iterator

setmetatable(ii,{ __tostring=function() return "base64.io_iterator" end})

-- Begin returns an input read iterator
--
function ii.begin()
local sb = string.byte

-- The iterator returns three bytes from the file for encoding or nil
-- when the end of the file has been reached.
--
return function()
s = file:read(3)
if s ~= nil and #s == 3 then
return sb(s,1,3)
end
return nil
end
end

-- The tail method on the iterator allows the routines to run faster
-- because each sequence of bytes doesn't have to test for EOF.
--
function ii.tail()
-- If one or two "overflow" bytes exist, return those.
--
if s ~= nil then return s:byte(1,2) end
end

return ii
end


--------------------------------------------------------------------------------
-- encode64_with_ii
--
-- Convert the value provided by an encode iterator that provides a begin
-- method, a tail method, and an iterator that returns three bytes for
-- each call until at the end. The tail method should return either 1 or 2
-- tail bytes (for source values that are not evenly divisible by three).
--
local function encode64_with_ii( ii, out )
local sc=string.char

for a, b, c in ii.begin() do
out( sc( e64( a, b, c ) ) )
end

encode_tail64( out, ii.tail() )

end


--------------------------------------------------------------------------------
-- encode64_with_predicate
--
-- Implements the basic raw data --> base64 conversion. Each three byte
-- sequence in the input string is converted to the encoded string and
-- given to the predicate provided in 4 output byte chunks. This method
-- is slightly faster for traversing existing strings in memory.
--
local function encode64_with_predicate( raw, out )
local rem=#raw%3 -- remainder
local len=#raw-rem -- 3 byte input adjusted
local sb=string.byte -- Mostly notational (slight performance)
local sc=string.char -- Mostly notational (slight performance)

-- Main encode loop converts three input bytes to 4 base64 encoded
-- ACSII values and calls the predicate with the value.
for i=1,len,3 do
-- This really isn't intended as obfuscation. It is more about
-- loop optimization and removing temporaries.
--
out( sc( e64( sb( raw ,i , i+3 ) ) ) )
-- | | |
-- | | byte i to i + 3
-- | |
-- | returns 4 encoded values
-- |
-- creates a string with the 4 returned values
end

-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if rem > 0 then
local x, y = sb( raw, len+1 )

if rem > 1 then
y = sb( raw, len+2 )
end

encode_tail64( out, x, y )
end
end


--------------------------------------------------------------------------------
-- encode64_tostring
--
-- Convenience method that accepts a string value and returns the
-- encoded version of that string.
--
local function encode64_tostring(raw)

local sb={} -- table to build string

local function collection_predicate(v)
sb[#sb+1]=v
end

-- Test with an 818K string in memory. Result is 1.1M of data.
--
-- lua_base64 base64 (gnu 8.21)
-- 202ms 54ms
-- 203ms 48ms
-- 204ms 50ms
-- 203ms 42ms
-- 205ms 46ms
--
encode64_with_predicate( raw, collection_predicate )

return table.concat(sb)
end


--[[**************************************************************************]]
--[[****************************** Decoding **********************************]]
--[[**************************************************************************]]


-- Precomputed tables (compromise using more memory for speed)
local b64d -- ANSI 'char' to right shifted bit pattern
local b64d_a1 -- byte addend
local b64d_a2 -- byte addend
local b64d_b1 -- byte addend
local b64d_b2 -- byte addend
local b64d_c1 -- byte addend
local b64d_z -- zero


--------------------------------------------------------------------------------
-- d64
--
-- Helper function to convert four six bit values into three full eight
-- bit values. Input values are the integer expression of the six bit value
-- encoded in the original base64 encoded string.
--
-- d64( _ _1 1 1 1 1 1,
-- | _ _ 2 2 2 2 2 2,
-- | | _ _ 3 3 3 3 3 3,
-- | | | _ _ 4 4 4 4 4 4)
-- | | | |
-- return ', [1 1 1 1 1 1 2 2] | |
-- ', [2 2 2 2 3 3 3 3] |
-- ' [3 3 4 4 4 4 4 4]
--
local function d64( b1, b2, b3, b4 )
-- We can get away with addition instead of anding the values together
-- because there are no overlapping bit patterns.
--
return
b64d_a1[b1] + b64d_a2[b2],
b64d_b1[b2] + b64d_b2[b3],
b64d_c1[b3] + b64d[b4]
end


--------------------------------------------------------------------------------
-- decode_tail64
--
-- Send the end of stream bytes that didn't get decoded via the main loop.
--
local function decode_tail64( out, e1, e2 ,e3, e4 )

if tail_padd64[2] == "" or e4 == tail_padd64[2]:byte() then
local n3 = b64d_z

if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
n3 = e3
end

-- Unpack the six bit values into the 8 bit values
local b1, b2 = d64( e1, e2, n3, b64d_z )

-- And add them to the res table
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
out( string.char( b1, b2 ) )
else
out( string.char( b1 ) )
end
end
end


--------------------------------------------------------------------------------
-- decode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper decoding.
--
local function decode64_io_iterator( file )

local ii = { }

-- An enumeration coroutine that handles the reading of an input file
-- to break data into proper pieces for building the original string.
--
local function enummerate( file )
local sc=string.char
local sb=string.byte
local ll="" -- last line storage
local len
local yield = coroutine.yield

-- Read a "reasonable amount" of data into the line buffer. Line by
-- line is not used so that a file with no line breaks doesn't
-- cause an inordinate amount of memory usage.
--
for cl in file:lines(2048) do
-- Reset the current line to contain valid chars and any previous
-- "leftover" bytes from the previous read
--
cl = ll .. cl:gsub(pattern_strip,"")
-- | |
-- | Remove "Invalid" chars (white space etc)
-- |
-- Left over from last line
--
len = (#cl-4)-(#cl%4)

-- see the comments in decode64_with_predicate for a rundown of
-- the results of this loop (sans the coroutine)
for i=1,len,4 do
yield( sc( d64( sb( cl, i, i+4 ) ) ) )
end

ll = cl:sub( len +1, #cl )
end

local l = #ll

if l >= 4 and ll:sub(-1) ~= tail_padd64[2] then
yield( sc( d64( sb( ll, 1, 4 ) ) ) )
l=l-4
end

if l > 0 then

local e1,e2,e3,e4 = ll:byte( 0 - l, -1 )

if e1 ~= nil then
decode_tail64( function(s) yield( s ) end, e1, e2, e3, e4 )
end
end

end

-- Returns an input iterator that is implemented as a coroutine. Each
-- yield of the co-routine sends reconstructed bytes to the loop handling
-- the iteration.
--
function ii.begin()
local co = coroutine.create( function() enummerate(file) end )

return function()
local code,res = coroutine.resume(co)
assert(code == true)
return res
end
end

return ii
end


--------------------------------------------------------------------------------
-- decode64_with_ii
--
-- Convert the value provided by a decode iterator that provides a begin
-- method, a tail method, and an iterator that returns four (usable!) bytes
-- for each call until at the end.
--
local function decode64_with_ii( ii, out )

-- Uses the iterator to pull values. Each reconstructed string
-- is sent to the output predicate.
--
for l in ii.begin() do out( l ) end

end


--------------------------------------------------------------------------------
-- decode64_with_predicate
--
-- Decode an entire base64 encoded string in memory using the predicate for
-- output.
--
local function decode64_with_predicate( raw, out )
-- Sanitize the input to strip characters that are not in the alphabet.
--
-- Note: This is a deviation from strict implementations where "bad data"
-- in the input stream is unsupported.
--
local san = raw:gsub(pattern_strip,"")
local len = #san-#san%4
local rem = #san-len
local sc = string.char
local sb = string.byte

if san:sub(-1,-1) == tail_padd64[2] then
rem = rem + 4
len = len - 4
end

for i=1,len,4 do
out( sc( d64( sb( san, i, i+4 ) ) ) )
end

if rem > 0 then
decode_tail64( out, sb( san, 0-rem, -1 ) )
end
end


--------------------------------------------------------------------------------
-- decode64_tostring
--
-- Takes a string that is encoded in base64 and returns the decoded value in
-- a new string.
--
local function decode64_tostring( raw )

local sb={} -- table to build string

local function collection_predicate(v)
sb[#sb+1]=v
end

decode64_with_predicate( raw, collection_predicate )

return table.concat(sb)
end


--------------------------------------------------------------------------------
-- set_and_get_alphabet
--
-- Sets and returns the encode / decode alphabet.
--
--
local function set_and_get_alphabet(alpha,term)

if alpha ~= nil then
local magic=
{
-- ["%"]="%%",
[" "]="% ",
["^"]="%^",
["$"]="%$",
["("]="%(",
[")"]="%)",
["."]="%.",
["["]="%[",
["]"]="%]",
["*"]="%*",
["+"]="%+",
["-"]="%-",
["?"]="%?",
}

c_alpha=known_base64_alphabets[alpha]
if c_alpha == nil then
c_alpha={ _alpha=alpha, _term=term }
end

assert( #c_alpha._alpha == 64, "The alphabet ~must~ be 64 unique values." )
assert( #c_alpha._term <= 1, "Specify zero or one termination character.")

b64d={} -- Decode table alpha -> right shifted int values
b64e={} -- Encode table 0-63 (6 bits) -> char table
local s=""
for i = 1,64 do
local byte = c_alpha._alpha:byte(i)
local str = string.char(byte)
b64e[i-1]=byte
assert( b64d[byte] == nil, "Duplicate value '"..str.."'" )
b64d[byte]=i-1
s=s..str
end


local ext --Alias for extraction routine that avoids extra table lookups

if bit32 then
ext = bit32.extract -- slight speed, vast visual (IMO)
elseif bit then
local band = bit.band
local rshift = bit.rshift
ext =
function(n, field, width)
width = width or 1
return band(rshift(n, field), 2^width-1)
end
else
error("Neither Lua 5.2 bit32 nor LuaJit bit library found!")
end

-- preload encode lookup tables
b64e_a = {}
b64e_a2 = {}
b64e_b1 = {}
b64e_b2 = {}
b64e_c1 = {}
b64e_c = {}

for f = 0,255 do
b64e_a [f]=b64e[ext(f,2,6)]
b64e_a2 [f]=ext(f,0,2)*16
b64e_b1 [f]=ext(f,4,4)
b64e_b2 [f]=ext(f,0,4)*4
b64e_c1 [f]=ext(f,6,2)
b64e_c [f]=b64e[ext(f,0,6)]
end

-- preload decode lookup tables
b64d_a1 = {}
b64d_a2 = {}
b64d_b1 = {}
b64d_b2 = {}
b64d_c1 = {}
b64d_z = b64e[0]

for k,v in pairs(b64d) do
-- Each comment shows the rough C expression that would be used to
-- generate the returned triple.
--
b64d_a1 [k] = v*4 -- ([b1] ) << 2
b64d_a2 [k] = math.floor( v / 16 ) -- ([b2] & 0x30) >> 4
b64d_b1 [k] = ext( v, 0, 4 ) * 16 -- ([b2] & 0x0F) << 4
b64d_b2 [k] = math.floor( v / 4 ) -- ([b3] & 0x3c) >> 2
b64d_c1 [k] = ext( v, 0, 2 ) * 64 -- ([b3] & 0x03) << 6
end

if c_alpha._term ~= "" then
tail_padd64[1]=string.char(c_alpha._term:byte(),c_alpha._term:byte())
tail_padd64[2]=string.char(c_alpha._term:byte())
else
tail_padd64[1]=""
tail_padd64[2]=""
end

local esc_term

if magic[c_alpha._term] ~= nil then
esc_term=c_alpha._term:gsub(magic[c_alpha._term],function (s) return magic[s] end)
elseif c_alpha._term == "%" then
esc_term = "%%"
else
esc_term=c_alpha._term
end

if not c_alpha._strip then
local p=s:gsub("%%",function (s) return "__unique__" end)
for k,v in pairs(magic)
do
p=p:gsub(v,function (s) return magic[s] end )
end
local mr=p:gsub("__unique__",function() return "%%" end)

c_alpha._strip = string.format("[^%s%s]",mr,esc_term)
end

assert( c_alpha._strip )

pattern_strip = c_alpha._strip

local c =0 for i in pairs(b64d) do c=c+1 end

assert( c_alpha._alpha == s, "Integrity error." )
assert( c == 64, "The alphabet must be 64 unique values." )
if esc_term ~= "" then
assert( not c_alpha._alpha:find(esc_term), "Tail characters must not exist in alphabet." )
end

if known_base64_alphabets[alpha] == nil then
known_base64_alphabets[alpha]=c_alpha
end
end

return c_alpha._alpha,c_alpha._term
end


--------------------------------------------------------------------------------
-- encode64
--
-- Entry point mode selector.
--
--
local function encode64(i,o)
local method

if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end

if type(i) == "string" then
if type(o) == "function" then
method = encode64_with_predicate
else
assert( o == nil, "unsupported request")
method = encode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = encode64_io_iterator(i)
method = encode64_with_ii
else
assert( false, "unsupported mode" )
end

return method(i,o)
end


--------------------------------------------------------------------------------
-- decode64
--
-- Entry point mode selector.
--
--
local function decode64(i,o)
local method

if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end

if type(i) == "string" then
if type(o) == "function" then
method = decode64_with_predicate
else
assert( o == nil, "unsupported request")
method = decode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = decode64_io_iterator(i)
method = decode64_with_ii
else
assert( false, "unsupported mode" )
end

return method(i,o)
end

set_and_get_alphabet("base64")

--[[**************************************************************************]]
--[[****************************** Module **********************************]]
--[[**************************************************************************]]




function p.base64(frame)
function p.base64(frame)
Line 740: Line 16:
args = frame
args = frame
end
end
data = args.data
data = frame.args.data
encode = args.encode
encode = frame.args.encode
yesno(encode, false)
yesno(encode, false)


return "Sorry, I gave up that time"
if (encode == true) then
base64.encode(data)
return
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
else
base64.decode(data)
return
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
end
end
end

return p
return p

Latest revision as of 15:48, 14 January 2021

Documentation for this module may be created at Module:Base64/doc

-- Made by [[User:Celeste]] - Licensed under GPLv3

local p = {}

local yesno = require('Module:Yesno') -- invoke Yesno

local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding

function p.base64(frame)
	local args
	local data
	local encode
	if frame == mw.getCurrentFrame() then
    	args = frame.args
	else
    	args = frame
	end
	data = frame.args.data
	encode = frame.args.encode
	yesno(encode, false)

	return "Sorry, I gave up that time"
end

return p