Module:Base64: Difference between revisions
Jump to navigation
Jump to search
Content added Content deleted
No edit summary |
No edit summary |
||
Line 7: | Line 7: | ||
local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding |
local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding |
||
function p.base64(frame) |
|||
local args |
|||
local data |
|||
local encode |
|||
if frame == mw.getCurrentFrame() then |
|||
args = frame.args |
|||
else |
|||
args = frame |
|||
end |
|||
data = args.data |
|||
encode = args.encode |
|||
yesno(encode, false) |
|||
--[[**************************************************************************]] |
|||
-- base64.lua |
|||
-- Copyright 2014 Ernest R. Ewert |
|||
-- |
|||
-- This Lua module contains the implementation of a Lua base64 encode |
|||
-- and decode library. |
|||
-- |
|||
-- The library exposes these methods. |
|||
-- |
|||
-- Method Args |
|||
-- ----------- ---------------------------------------------- |
|||
-- encode String in / out |
|||
-- decode String in / out |
|||
-- |
|||
-- encode String, function(value) predicate |
|||
-- decode String, function(value) predicate |
|||
-- |
|||
-- encode file, function(value) predicate |
|||
-- deocde file, function(value) predicate |
|||
-- |
|||
-- encode file, file |
|||
-- deocde file, file |
|||
-- |
|||
-- alpha alphabet, term char |
|||
-- |
|||
-- bitshift functions (<<, >> equivalent) |
|||
-------------------------------------------------------------------------------- |
|||
-- shift left |
|||
-- known_base64_alphabets |
|||
function lsh(value,shift) |
|||
-- |
|||
return (value*(2^shift)) % 256 |
|||
-- |
|||
local known_base64_alphabets= |
|||
{ |
|||
base64= -- RFC 2045 (Ignores max line length restrictions) |
|||
{ |
|||
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", |
|||
_strip="[^%a%d%+%/%=]", |
|||
_term="=" |
|||
}, |
|||
base64noterm= -- RFC 2045 (Ignores max line length restrictions) |
|||
{ |
|||
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", |
|||
_strip="[^%a%d%+%/]", |
|||
_term="" |
|||
}, |
|||
base64url= -- RFC 4648 'base64url' |
|||
{ |
|||
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", |
|||
_strip="[^%a%d%+%-%_=]", |
|||
_term="" |
|||
}, |
|||
} |
|||
local c_alpha=known_base64_alphabets.base64 |
|||
local pattern_strip |
|||
--[[**************************************************************************]] |
|||
--[[****************************** Encoding **********************************]] |
|||
--[[**************************************************************************]] |
|||
-- Precomputed tables (compromise using more memory for speed) |
|||
local b64e -- 6 bit patterns to ANSI 'char' values |
|||
local b64e_a -- ready to use |
|||
local b64e_a2 -- byte addend |
|||
local b64e_b1 -- byte addend |
|||
local b64e_b2 -- byte addend |
|||
local b64e_c1 -- byte addend |
|||
local b64e_c -- ready to use |
|||
-- Tail padding values |
|||
local tail_padd64= |
|||
{ |
|||
"==", -- two bytes modulo |
|||
"=" -- one byte modulo |
|||
} |
|||
-------------------------------------------------------------------------------- |
|||
-- e64 |
|||
-- |
|||
-- Helper function to convert three eight bit values into four encoded |
|||
-- 6 (significant) bit values. |
|||
-- |
|||
-- 7 0 7 0 7 0 |
|||
-- e64(a a a a a a a a,b b b b b b b b,c c c c c c c c) |
|||
-- | | | | |
|||
-- return [ a a a a a a]| | | |
|||
-- [ a a b b b b]| | |
|||
-- [ b b b b c c]| |
|||
-- [ c c c c c c] |
|||
-- |
|||
local function e64( a, b, c ) |
|||
-- Return pre-calculated values for encoded value 1 and 4 |
|||
-- Get the pre-calculated extractions for value 2 and 3, look them |
|||
-- up and return the proper value. |
|||
-- |
|||
return b64e_a[a], |
|||
b64e[ b64e_a2[a]+b64e_b1[b] ], |
|||
b64e[ b64e_b2[b]+b64e_c1[c] ], |
|||
b64e_c[c] |
|||
end |
end |
||
-- shift right |
|||
-------------------------------------------------------------------------------- |
|||
function rsh(value,shift) |
|||
-- encode_tail64 |
|||
return math.floor(value/2^shift) % 256 |
|||
-- |
|||
-- Send a tail pad value to the output predicate provided. |
|||
-- |
|||
local function encode_tail64( out, x, y ) |
|||
-- If we have a number of input bytes that isn't exactly divisible |
|||
-- by 3 then we need to pad the tail |
|||
if x ~= nil then |
|||
local a,b,r = x,0,1 |
|||
if y ~= nil then |
|||
r = 2 |
|||
b = y |
|||
end |
|||
-- Encode three bytes of info, with the tail byte as zeros and |
|||
-- ignore any fourth encoded ASCII value. (We should NOT have a |
|||
-- forth byte at this point.) |
|||
local b1, b2, b3 = e64( a, b, 0 ) |
|||
-- always add the first 2 six bit values to the res table |
|||
-- 1 remainder input byte needs 8 output bits |
|||
local tail_value = string.char( b1, b2 ) |
|||
-- two remainder input bytes will need 18 output bits (2 as pad) |
|||
if r == 2 then |
|||
tail_value=tail_value..string.char( b3 ) |
|||
end |
|||
-- send the last 4 byte sequence with appropriate tail padding |
|||
out( tail_value .. tail_padd64[r] ) |
|||
end |
|||
end |
end |
||
-- return single bit (for OR) |
|||
-------------------------------------------------------------------------------- |
|||
function bit(x,b) |
|||
-- encode64_io_iterator |
|||
return (x % 2^b - x % 2^(b-1) > 0) |
|||
-- |
|||
-- Create an io input iterator to read an input file and split values for |
|||
-- proper encoding. |
|||
-- |
|||
local function encode64_io_iterator(file) |
|||
assert( io.type(file) == "file", "argument must be readable file handle" ) |
|||
assert( file.read ~= nil, "argument must be readable file handle" ) |
|||
local ii = { } -- Table for the input iterator |
|||
setmetatable(ii,{ __tostring=function() return "base64.io_iterator" end}) |
|||
-- Begin returns an input read iterator |
|||
-- |
|||
function ii.begin() |
|||
local sb = string.byte |
|||
-- The iterator returns three bytes from the file for encoding or nil |
|||
-- when the end of the file has been reached. |
|||
-- |
|||
return function() |
|||
s = file:read(3) |
|||
if s ~= nil and #s == 3 then |
|||
return sb(s,1,3) |
|||
end |
|||
return nil |
|||
end |
|||
end |
|||
-- The tail method on the iterator allows the routines to run faster |
|||
-- because each sequence of bytes doesn't have to test for EOF. |
|||
-- |
|||
function ii.tail() |
|||
-- If one or two "overflow" bytes exist, return those. |
|||
-- |
|||
if s ~= nil then return s:byte(1,2) end |
|||
end |
|||
return ii |
|||
end |
end |
||
-- logic OR for number values |
|||
-------------------------------------------------------------------------------- |
|||
function lor(x,y) |
|||
-- encode64_with_ii |
|||
result = 0 |
|||
-- |
|||
for p=1,8 do result = result + (((bit(x,p) or bit(y,p)) == true) and 2^(p-1) or 0) end |
|||
-- Convert the value provided by an encode iterator that provides a begin |
|||
return result |
|||
-- method, a tail method, and an iterator that returns three bytes for |
|||
-- each call until at the end. The tail method should return either 1 or 2 |
|||
-- tail bytes (for source values that are not evenly divisible by three). |
|||
-- |
|||
local function encode64_with_ii( ii, out ) |
|||
local sc=string.char |
|||
for a, b, c in ii.begin() do |
|||
out( sc( e64( a, b, c ) ) ) |
|||
end |
|||
encode_tail64( out, ii.tail() ) |
|||
end |
end |
||
-- encryption table |
|||
local base64chars = {[0]='A',[1]='B',[2]='C',[3]='D',[4]='E',[5]='F',[6]='G',[7]='H',[8]='I',[9]='J',[10]='K',[11]='L',[12]='M',[13]='N',[14]='O',[15]='P',[16]='Q',[17]='R',[18]='S',[19]='T',[20]='U',[21]='V',[22]='W',[23]='X',[24]='Y',[25]='Z',[26]='a',[27]='b',[28]='c',[29]='d',[30]='e',[31]='f',[32]='g',[33]='h',[34]='i',[35]='j',[36]='k',[37]='l',[38]='m',[39]='n',[40]='o',[41]='p',[42]='q',[43]='r',[44]='s',[45]='t',[46]='u',[47]='v',[48]='w',[49]='x',[50]='y',[51]='z',[52]='0',[53]='1',[54]='2',[55]='3',[56]='4',[57]='5',[58]='6',[59]='7',[60]='8',[61]='9',[62]='-',[63]='_'} |
|||
-------------------------------------------------------------------------------- |
|||
-- function encode |
|||
-- encode64_with_predicate |
|||
-- encodes input string to base64. |
|||
-- |
|||
function enc(data) |
|||
-- Implements the basic raw data --> base64 conversion. Each three byte |
|||
local bytes = {} |
|||
-- sequence in the input string is converted to the encoded string and |
|||
local result = "" |
|||
-- given to the predicate provided in 4 output byte chunks. This method |
|||
for spos=0,string.len(data)-1,3 do |
|||
-- is slightly faster for traversing existing strings in memory. |
|||
for byte=1,3 do bytes[byte] = string.byte(string.sub(data,(spos+byte))) or 0 end |
|||
-- |
|||
result = string.format('%s%s%s%s%s',result,base64chars[rsh(bytes[1],2)],base64chars[lor(lsh((bytes[1] % 4),4), rsh(bytes[2],4))] or "=",((#data-spos) > 1) and base64chars[lor(lsh(bytes[2] % 16,2), rsh(bytes[3],6))] or "=",((#data-spos) > 2) and base64chars[(bytes[3] % 64)] or "=") |
|||
local function encode64_with_predicate( raw, out ) |
|||
end |
|||
local rem=#raw%3 -- remainder |
|||
return result |
|||
local len=#raw-rem -- 3 byte input adjusted |
|||
local sb=string.byte -- Mostly notational (slight performance) |
|||
local sc=string.char -- Mostly notational (slight performance) |
|||
-- Main encode loop converts three input bytes to 4 base64 encoded |
|||
-- ACSII values and calls the predicate with the value. |
|||
for i=1,len,3 do |
|||
-- This really isn't intended as obfuscation. It is more about |
|||
-- loop optimization and removing temporaries. |
|||
-- |
|||
out( sc( e64( sb( raw ,i , i+3 ) ) ) ) |
|||
-- | | | |
|||
-- | | byte i to i + 3 |
|||
-- | | |
|||
-- | returns 4 encoded values |
|||
-- | |
|||
-- creates a string with the 4 returned values |
|||
end |
|||
-- If we have a number of input bytes that isn't exactly divisible |
|||
-- by 3 then we need to pad the tail |
|||
if rem > 0 then |
|||
local x, y = sb( raw, len+1 ) |
|||
if rem > 1 then |
|||
y = sb( raw, len+2 ) |
|||
end |
|||
encode_tail64( out, x, y ) |
|||
end |
|||
end |
end |
||
-- decryption table |
|||
local base64bytes = {['A']=0,['B']=1,['C']=2,['D']=3,['E']=4,['F']=5,['G']=6,['H']=7,['I']=8,['J']=9,['K']=10,['L']=11,['M']=12,['N']=13,['O']=14,['P']=15,['Q']=16,['R']=17,['S']=18,['T']=19,['U']=20,['V']=21,['W']=22,['X']=23,['Y']=24,['Z']=25,['a']=26,['b']=27,['c']=28,['d']=29,['e']=30,['f']=31,['g']=32,['h']=33,['i']=34,['j']=35,['k']=36,['l']=37,['m']=38,['n']=39,['o']=40,['p']=41,['q']=42,['r']=43,['s']=44,['t']=45,['u']=46,['v']=47,['w']=48,['x']=49,['y']=50,['z']=51,['0']=52,['1']=53,['2']=54,['3']=55,['4']=56,['5']=57,['6']=58,['7']=59,['8']=60,['9']=61,['-']=62,['_']=63,['=']=nil} |
|||
-------------------------------------------------------------------------------- |
|||
-- function decode |
|||
-- encode64_tostring |
|||
-- decode base64 input to string |
|||
-- |
|||
function dec(data) |
|||
-- Convenience method that accepts a string value and returns the |
|||
local chars = {} |
|||
-- encoded version of that string. |
|||
local result="" |
|||
-- |
|||
for dpos=0,string.len(data)-1,4 do |
|||
local function encode64_tostring(raw) |
|||
for char=1,4 do chars[char] = base64bytes[(string.sub(data,(dpos+char),(dpos+char)) or "=")] end |
|||
result = string.format('%s%s%s%s',result,string.char(lor(lsh(chars[1],2), rsh(chars[2],4))),(chars[3] ~= nil) and string.char(lor(lsh(chars[2],4), rsh(chars[3],2))) or "",(chars[4] ~= nil) and string.char(lor(lsh(chars[3],6) % 192, (chars[4]))) or "") |
|||
local sb={} -- table to build string |
|||
end |
|||
return result |
|||
local function collection_predicate(v) |
|||
sb[#sb+1]=v |
|||
end |
|||
-- Test with an 818K string in memory. Result is 1.1M of data. |
|||
-- |
|||
-- lua_base64 base64 (gnu 8.21) |
|||
-- 202ms 54ms |
|||
-- 203ms 48ms |
|||
-- 204ms 50ms |
|||
-- 203ms 42ms |
|||
-- 205ms 46ms |
|||
-- |
|||
encode64_with_predicate( raw, collection_predicate ) |
|||
return table.concat(sb) |
|||
end |
end |
||
--[[**************************************************************************]] |
|||
--[[****************************** Decoding **********************************]] |
|||
--[[**************************************************************************]] |
|||
-- Precomputed tables (compromise using more memory for speed) |
|||
local b64d -- ANSI 'char' to right shifted bit pattern |
|||
local b64d_a1 -- byte addend |
|||
local b64d_a2 -- byte addend |
|||
local b64d_b1 -- byte addend |
|||
local b64d_b2 -- byte addend |
|||
local b64d_c1 -- byte addend |
|||
local b64d_z -- zero |
|||
-------------------------------------------------------------------------------- |
|||
-- d64 |
|||
-- |
|||
-- Helper function to convert four six bit values into three full eight |
|||
-- bit values. Input values are the integer expression of the six bit value |
|||
-- encoded in the original base64 encoded string. |
|||
-- |
|||
-- d64( _ _1 1 1 1 1 1, |
|||
-- | _ _ 2 2 2 2 2 2, |
|||
-- | | _ _ 3 3 3 3 3 3, |
|||
-- | | | _ _ 4 4 4 4 4 4) |
|||
-- | | | | |
|||
-- return ', [1 1 1 1 1 1 2 2] | | |
|||
-- ', [2 2 2 2 3 3 3 3] | |
|||
-- ' [3 3 4 4 4 4 4 4] |
|||
-- |
|||
local function d64( b1, b2, b3, b4 ) |
|||
-- We can get away with addition instead of anding the values together |
|||
-- because there are no overlapping bit patterns. |
|||
-- |
|||
return |
|||
b64d_a1[b1] + b64d_a2[b2], |
|||
b64d_b1[b2] + b64d_b2[b3], |
|||
b64d_c1[b3] + b64d[b4] |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode_tail64 |
|||
-- |
|||
-- Send the end of stream bytes that didn't get decoded via the main loop. |
|||
-- |
|||
local function decode_tail64( out, e1, e2 ,e3, e4 ) |
|||
if tail_padd64[2] == "" or e4 == tail_padd64[2]:byte() then |
|||
local n3 = b64d_z |
|||
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then |
|||
n3 = e3 |
|||
end |
|||
-- Unpack the six bit values into the 8 bit values |
|||
local b1, b2 = d64( e1, e2, n3, b64d_z ) |
|||
-- And add them to the res table |
|||
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then |
|||
out( string.char( b1, b2 ) ) |
|||
else |
|||
out( string.char( b1 ) ) |
|||
end |
|||
end |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode64_io_iterator |
|||
-- |
|||
-- Create an io input iterator to read an input file and split values for |
|||
-- proper decoding. |
|||
-- |
|||
local function decode64_io_iterator( file ) |
|||
local ii = { } |
|||
-- An enumeration coroutine that handles the reading of an input file |
|||
-- to break data into proper pieces for building the original string. |
|||
-- |
|||
local function enummerate( file ) |
|||
local sc=string.char |
|||
local sb=string.byte |
|||
local ll="" -- last line storage |
|||
local len |
|||
local yield = coroutine.yield |
|||
-- Read a "reasonable amount" of data into the line buffer. Line by |
|||
-- line is not used so that a file with no line breaks doesn't |
|||
-- cause an inordinate amount of memory usage. |
|||
-- |
|||
for cl in file:lines(2048) do |
|||
-- Reset the current line to contain valid chars and any previous |
|||
-- "leftover" bytes from the previous read |
|||
-- |
|||
cl = ll .. cl:gsub(pattern_strip,"") |
|||
-- | | |
|||
-- | Remove "Invalid" chars (white space etc) |
|||
-- | |
|||
-- Left over from last line |
|||
-- |
|||
len = (#cl-4)-(#cl%4) |
|||
-- see the comments in decode64_with_predicate for a rundown of |
|||
-- the results of this loop (sans the coroutine) |
|||
for i=1,len,4 do |
|||
yield( sc( d64( sb( cl, i, i+4 ) ) ) ) |
|||
end |
|||
ll = cl:sub( len +1, #cl ) |
|||
end |
|||
local l = #ll |
|||
if l >= 4 and ll:sub(-1) ~= tail_padd64[2] then |
|||
yield( sc( d64( sb( ll, 1, 4 ) ) ) ) |
|||
l=l-4 |
|||
end |
|||
if l > 0 then |
|||
local e1,e2,e3,e4 = ll:byte( 0 - l, -1 ) |
|||
if e1 ~= nil then |
|||
decode_tail64( function(s) yield( s ) end, e1, e2, e3, e4 ) |
|||
end |
|||
end |
|||
end |
|||
-- Returns an input iterator that is implemented as a coroutine. Each |
|||
-- yield of the co-routine sends reconstructed bytes to the loop handling |
|||
-- the iteration. |
|||
-- |
|||
function ii.begin() |
|||
local co = coroutine.create( function() enummerate(file) end ) |
|||
return function() |
|||
local code,res = coroutine.resume(co) |
|||
assert(code == true) |
|||
return res |
|||
end |
|||
end |
|||
return ii |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode64_with_ii |
|||
-- |
|||
-- Convert the value provided by a decode iterator that provides a begin |
|||
-- method, a tail method, and an iterator that returns four (usable!) bytes |
|||
-- for each call until at the end. |
|||
-- |
|||
local function decode64_with_ii( ii, out ) |
|||
-- Uses the iterator to pull values. Each reconstructed string |
|||
-- is sent to the output predicate. |
|||
-- |
|||
for l in ii.begin() do out( l ) end |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode64_with_predicate |
|||
-- |
|||
-- Decode an entire base64 encoded string in memory using the predicate for |
|||
-- output. |
|||
-- |
|||
local function decode64_with_predicate( raw, out ) |
|||
-- Sanitize the input to strip characters that are not in the alphabet. |
|||
-- |
|||
-- Note: This is a deviation from strict implementations where "bad data" |
|||
-- in the input stream is unsupported. |
|||
-- |
|||
local san = raw:gsub(pattern_strip,"") |
|||
local len = #san-#san%4 |
|||
local rem = #san-len |
|||
local sc = string.char |
|||
local sb = string.byte |
|||
if san:sub(-1,-1) == tail_padd64[2] then |
|||
rem = rem + 4 |
|||
len = len - 4 |
|||
end |
|||
for i=1,len,4 do |
|||
out( sc( d64( sb( san, i, i+4 ) ) ) ) |
|||
end |
|||
if rem > 0 then |
|||
decode_tail64( out, sb( san, 0-rem, -1 ) ) |
|||
end |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode64_tostring |
|||
-- |
|||
-- Takes a string that is encoded in base64 and returns the decoded value in |
|||
-- a new string. |
|||
-- |
|||
local function decode64_tostring( raw ) |
|||
local sb={} -- table to build string |
|||
local function collection_predicate(v) |
|||
sb[#sb+1]=v |
|||
end |
|||
decode64_with_predicate( raw, collection_predicate ) |
|||
return table.concat(sb) |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- set_and_get_alphabet |
|||
-- |
|||
-- Sets and returns the encode / decode alphabet. |
|||
-- |
|||
-- |
|||
local function set_and_get_alphabet(alpha,term) |
|||
if alpha ~= nil then |
|||
local magic= |
|||
{ |
|||
-- ["%"]="%%", |
|||
[" "]="% ", |
|||
["^"]="%^", |
|||
["$"]="%$", |
|||
["("]="%(", |
|||
[")"]="%)", |
|||
["."]="%.", |
|||
["["]="%[", |
|||
["]"]="%]", |
|||
["*"]="%*", |
|||
["+"]="%+", |
|||
["-"]="%-", |
|||
["?"]="%?", |
|||
} |
|||
c_alpha=known_base64_alphabets[alpha] |
|||
if c_alpha == nil then |
|||
c_alpha={ _alpha=alpha, _term=term } |
|||
end |
|||
assert( #c_alpha._alpha == 64, "The alphabet ~must~ be 64 unique values." ) |
|||
assert( #c_alpha._term <= 1, "Specify zero or one termination character.") |
|||
b64d={} -- Decode table alpha -> right shifted int values |
|||
b64e={} -- Encode table 0-63 (6 bits) -> char table |
|||
local s="" |
|||
for i = 1,64 do |
|||
local byte = c_alpha._alpha:byte(i) |
|||
local str = string.char(byte) |
|||
b64e[i-1]=byte |
|||
assert( b64d[byte] == nil, "Duplicate value '"..str.."'" ) |
|||
b64d[byte]=i-1 |
|||
s=s..str |
|||
end |
|||
local ext --Alias for extraction routine that avoids extra table lookups |
|||
if bit32 then |
|||
ext = bit32.extract -- slight speed, vast visual (IMO) |
|||
elseif bit then |
|||
local band = bit.band |
|||
local rshift = bit.rshift |
|||
ext = |
|||
function(n, field, width) |
|||
width = width or 1 |
|||
return band(rshift(n, field), 2^width-1) |
|||
end |
|||
else |
|||
error("Neither Lua 5.2 bit32 nor LuaJit bit library found!") |
|||
end |
|||
-- preload encode lookup tables |
|||
b64e_a = {} |
|||
b64e_a2 = {} |
|||
b64e_b1 = {} |
|||
b64e_b2 = {} |
|||
b64e_c1 = {} |
|||
b64e_c = {} |
|||
for f = 0,255 do |
|||
b64e_a [f]=b64e[ext(f,2,6)] |
|||
b64e_a2 [f]=ext(f,0,2)*16 |
|||
b64e_b1 [f]=ext(f,4,4) |
|||
b64e_b2 [f]=ext(f,0,4)*4 |
|||
b64e_c1 [f]=ext(f,6,2) |
|||
b64e_c [f]=b64e[ext(f,0,6)] |
|||
end |
|||
-- preload decode lookup tables |
|||
b64d_a1 = {} |
|||
b64d_a2 = {} |
|||
b64d_b1 = {} |
|||
b64d_b2 = {} |
|||
b64d_c1 = {} |
|||
b64d_z = b64e[0] |
|||
for k,v in pairs(b64d) do |
|||
-- Each comment shows the rough C expression that would be used to |
|||
-- generate the returned triple. |
|||
-- |
|||
b64d_a1 [k] = v*4 -- ([b1] ) << 2 |
|||
b64d_a2 [k] = math.floor( v / 16 ) -- ([b2] & 0x30) >> 4 |
|||
b64d_b1 [k] = ext( v, 0, 4 ) * 16 -- ([b2] & 0x0F) << 4 |
|||
b64d_b2 [k] = math.floor( v / 4 ) -- ([b3] & 0x3c) >> 2 |
|||
b64d_c1 [k] = ext( v, 0, 2 ) * 64 -- ([b3] & 0x03) << 6 |
|||
end |
|||
if c_alpha._term ~= "" then |
|||
tail_padd64[1]=string.char(c_alpha._term:byte(),c_alpha._term:byte()) |
|||
tail_padd64[2]=string.char(c_alpha._term:byte()) |
|||
else |
|||
tail_padd64[1]="" |
|||
tail_padd64[2]="" |
|||
end |
|||
local esc_term |
|||
if magic[c_alpha._term] ~= nil then |
|||
esc_term=c_alpha._term:gsub(magic[c_alpha._term],function (s) return magic[s] end) |
|||
elseif c_alpha._term == "%" then |
|||
esc_term = "%%" |
|||
else |
|||
esc_term=c_alpha._term |
|||
end |
|||
if not c_alpha._strip then |
|||
local p=s:gsub("%%",function (s) return "__unique__" end) |
|||
for k,v in pairs(magic) |
|||
do |
|||
p=p:gsub(v,function (s) return magic[s] end ) |
|||
end |
|||
local mr=p:gsub("__unique__",function() return "%%" end) |
|||
c_alpha._strip = string.format("[^%s%s]",mr,esc_term) |
|||
end |
|||
assert( c_alpha._strip ) |
|||
pattern_strip = c_alpha._strip |
|||
local c =0 for i in pairs(b64d) do c=c+1 end |
|||
assert( c_alpha._alpha == s, "Integrity error." ) |
|||
assert( c == 64, "The alphabet must be 64 unique values." ) |
|||
if esc_term ~= "" then |
|||
assert( not c_alpha._alpha:find(esc_term), "Tail characters must not exist in alphabet." ) |
|||
end |
|||
if known_base64_alphabets[alpha] == nil then |
|||
known_base64_alphabets[alpha]=c_alpha |
|||
end |
|||
end |
|||
return c_alpha._alpha,c_alpha._term |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- encode64 |
|||
-- |
|||
-- Entry point mode selector. |
|||
-- |
|||
-- |
|||
local function encode64(i,o) |
|||
local method |
|||
if o ~= nil and io.type(o) == "file" then |
|||
local file_out = o |
|||
o = function(s) file_out:write(s) end |
|||
end |
|||
if type(i) == "string" then |
|||
if type(o) == "function" then |
|||
method = encode64_with_predicate |
|||
else |
|||
assert( o == nil, "unsupported request") |
|||
method = encode64_tostring |
|||
end |
|||
elseif io.type(i) == "file" then |
|||
assert( type(o) == "function", "file source requires output predicate") |
|||
i = encode64_io_iterator(i) |
|||
method = encode64_with_ii |
|||
else |
|||
assert( false, "unsupported mode" ) |
|||
end |
|||
return method(i,o) |
|||
end |
|||
-------------------------------------------------------------------------------- |
|||
-- decode64 |
|||
-- |
|||
-- Entry point mode selector. |
|||
-- |
|||
-- |
|||
local function decode64(i,o) |
|||
local method |
|||
if o ~= nil and io.type(o) == "file" then |
|||
local file_out = o |
|||
o = function(s) file_out:write(s) end |
|||
end |
|||
if type(i) == "string" then |
|||
if type(o) == "function" then |
|||
method = decode64_with_predicate |
|||
else |
|||
assert( o == nil, "unsupported request") |
|||
method = decode64_tostring |
|||
end |
|||
elseif io.type(i) == "file" then |
|||
assert( type(o) == "function", "file source requires output predicate") |
|||
i = decode64_io_iterator(i) |
|||
method = decode64_with_ii |
|||
else |
|||
assert( false, "unsupported mode" ) |
|||
end |
|||
return method(i,o) |
|||
end |
|||
set_and_get_alphabet("base64") |
|||
--[[**************************************************************************]] |
|||
--[[****************************** Module **********************************]] |
|||
--[[**************************************************************************]] |
|||
function p.base64(frame) |
|||
local args |
|||
local data |
|||
local encode |
|||
if frame == mw.getCurrentFrame() then |
|||
args = frame.args |
|||
else |
|||
args = frame |
|||
end |
|||
data = args.data |
|||
encode = args.encode |
|||
yesno(encode, false) |
|||
if (encode == true) then |
if (encode == true) then |
||
base64.encode(data) |
|||
return |
|||
{ |
|||
encode = encode64, |
|||
decode = decode64, |
|||
alpha = set_and_get_alphabet, |
|||
} |
|||
else |
else |
||
base64.decode(data) |
|||
return |
|||
{ |
|||
encode = encode64, |
|||
decode = decode64, |
|||
alpha = set_and_get_alphabet, |
|||
} |
|||
end |
end |
||
end |
end |
||
return p |
return p |
Revision as of 14:58, 14 January 2021
Documentation for this module may be created at Module:Base64/doc
-- Made by [[User:Celeste]] - Licensed under GPLv3
local p = {}
local yesno = require('Module:Yesno') -- invoke Yesno
local b='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -- You will need this for encoding/decoding
--[[**************************************************************************]]
-- base64.lua
-- Copyright 2014 Ernest R. Ewert
--
-- This Lua module contains the implementation of a Lua base64 encode
-- and decode library.
--
-- The library exposes these methods.
--
-- Method Args
-- ----------- ----------------------------------------------
-- encode String in / out
-- decode String in / out
--
-- encode String, function(value) predicate
-- decode String, function(value) predicate
--
-- encode file, function(value) predicate
-- deocde file, function(value) predicate
--
-- encode file, file
-- deocde file, file
--
-- alpha alphabet, term char
--
--------------------------------------------------------------------------------
-- known_base64_alphabets
--
--
local known_base64_alphabets=
{
base64= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/%=]",
_term="="
},
base64noterm= -- RFC 2045 (Ignores max line length restrictions)
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
_strip="[^%a%d%+%/]",
_term=""
},
base64url= -- RFC 4648 'base64url'
{
_alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
_strip="[^%a%d%+%-%_=]",
_term=""
},
}
local c_alpha=known_base64_alphabets.base64
local pattern_strip
--[[**************************************************************************]]
--[[****************************** Encoding **********************************]]
--[[**************************************************************************]]
-- Precomputed tables (compromise using more memory for speed)
local b64e -- 6 bit patterns to ANSI 'char' values
local b64e_a -- ready to use
local b64e_a2 -- byte addend
local b64e_b1 -- byte addend
local b64e_b2 -- byte addend
local b64e_c1 -- byte addend
local b64e_c -- ready to use
-- Tail padding values
local tail_padd64=
{
"==", -- two bytes modulo
"=" -- one byte modulo
}
--------------------------------------------------------------------------------
-- e64
--
-- Helper function to convert three eight bit values into four encoded
-- 6 (significant) bit values.
--
-- 7 0 7 0 7 0
-- e64(a a a a a a a a,b b b b b b b b,c c c c c c c c)
-- | | | |
-- return [ a a a a a a]| | |
-- [ a a b b b b]| |
-- [ b b b b c c]|
-- [ c c c c c c]
--
local function e64( a, b, c )
-- Return pre-calculated values for encoded value 1 and 4
-- Get the pre-calculated extractions for value 2 and 3, look them
-- up and return the proper value.
--
return b64e_a[a],
b64e[ b64e_a2[a]+b64e_b1[b] ],
b64e[ b64e_b2[b]+b64e_c1[c] ],
b64e_c[c]
end
--------------------------------------------------------------------------------
-- encode_tail64
--
-- Send a tail pad value to the output predicate provided.
--
local function encode_tail64( out, x, y )
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if x ~= nil then
local a,b,r = x,0,1
if y ~= nil then
r = 2
b = y
end
-- Encode three bytes of info, with the tail byte as zeros and
-- ignore any fourth encoded ASCII value. (We should NOT have a
-- forth byte at this point.)
local b1, b2, b3 = e64( a, b, 0 )
-- always add the first 2 six bit values to the res table
-- 1 remainder input byte needs 8 output bits
local tail_value = string.char( b1, b2 )
-- two remainder input bytes will need 18 output bits (2 as pad)
if r == 2 then
tail_value=tail_value..string.char( b3 )
end
-- send the last 4 byte sequence with appropriate tail padding
out( tail_value .. tail_padd64[r] )
end
end
--------------------------------------------------------------------------------
-- encode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper encoding.
--
local function encode64_io_iterator(file)
assert( io.type(file) == "file", "argument must be readable file handle" )
assert( file.read ~= nil, "argument must be readable file handle" )
local ii = { } -- Table for the input iterator
setmetatable(ii,{ __tostring=function() return "base64.io_iterator" end})
-- Begin returns an input read iterator
--
function ii.begin()
local sb = string.byte
-- The iterator returns three bytes from the file for encoding or nil
-- when the end of the file has been reached.
--
return function()
s = file:read(3)
if s ~= nil and #s == 3 then
return sb(s,1,3)
end
return nil
end
end
-- The tail method on the iterator allows the routines to run faster
-- because each sequence of bytes doesn't have to test for EOF.
--
function ii.tail()
-- If one or two "overflow" bytes exist, return those.
--
if s ~= nil then return s:byte(1,2) end
end
return ii
end
--------------------------------------------------------------------------------
-- encode64_with_ii
--
-- Convert the value provided by an encode iterator that provides a begin
-- method, a tail method, and an iterator that returns three bytes for
-- each call until at the end. The tail method should return either 1 or 2
-- tail bytes (for source values that are not evenly divisible by three).
--
local function encode64_with_ii( ii, out )
local sc=string.char
for a, b, c in ii.begin() do
out( sc( e64( a, b, c ) ) )
end
encode_tail64( out, ii.tail() )
end
--------------------------------------------------------------------------------
-- encode64_with_predicate
--
-- Implements the basic raw data --> base64 conversion. Each three byte
-- sequence in the input string is converted to the encoded string and
-- given to the predicate provided in 4 output byte chunks. This method
-- is slightly faster for traversing existing strings in memory.
--
local function encode64_with_predicate( raw, out )
local rem=#raw%3 -- remainder
local len=#raw-rem -- 3 byte input adjusted
local sb=string.byte -- Mostly notational (slight performance)
local sc=string.char -- Mostly notational (slight performance)
-- Main encode loop converts three input bytes to 4 base64 encoded
-- ACSII values and calls the predicate with the value.
for i=1,len,3 do
-- This really isn't intended as obfuscation. It is more about
-- loop optimization and removing temporaries.
--
out( sc( e64( sb( raw ,i , i+3 ) ) ) )
-- | | |
-- | | byte i to i + 3
-- | |
-- | returns 4 encoded values
-- |
-- creates a string with the 4 returned values
end
-- If we have a number of input bytes that isn't exactly divisible
-- by 3 then we need to pad the tail
if rem > 0 then
local x, y = sb( raw, len+1 )
if rem > 1 then
y = sb( raw, len+2 )
end
encode_tail64( out, x, y )
end
end
--------------------------------------------------------------------------------
-- encode64_tostring
--
-- Convenience method that accepts a string value and returns the
-- encoded version of that string.
--
local function encode64_tostring(raw)
local sb={} -- table to build string
local function collection_predicate(v)
sb[#sb+1]=v
end
-- Test with an 818K string in memory. Result is 1.1M of data.
--
-- lua_base64 base64 (gnu 8.21)
-- 202ms 54ms
-- 203ms 48ms
-- 204ms 50ms
-- 203ms 42ms
-- 205ms 46ms
--
encode64_with_predicate( raw, collection_predicate )
return table.concat(sb)
end
--[[**************************************************************************]]
--[[****************************** Decoding **********************************]]
--[[**************************************************************************]]
-- Precomputed tables (compromise using more memory for speed)
local b64d -- ANSI 'char' to right shifted bit pattern
local b64d_a1 -- byte addend
local b64d_a2 -- byte addend
local b64d_b1 -- byte addend
local b64d_b2 -- byte addend
local b64d_c1 -- byte addend
local b64d_z -- zero
--------------------------------------------------------------------------------
-- d64
--
-- Helper function to convert four six bit values into three full eight
-- bit values. Input values are the integer expression of the six bit value
-- encoded in the original base64 encoded string.
--
-- d64( _ _1 1 1 1 1 1,
-- | _ _ 2 2 2 2 2 2,
-- | | _ _ 3 3 3 3 3 3,
-- | | | _ _ 4 4 4 4 4 4)
-- | | | |
-- return ', [1 1 1 1 1 1 2 2] | |
-- ', [2 2 2 2 3 3 3 3] |
-- ' [3 3 4 4 4 4 4 4]
--
local function d64( b1, b2, b3, b4 )
-- We can get away with addition instead of anding the values together
-- because there are no overlapping bit patterns.
--
return
b64d_a1[b1] + b64d_a2[b2],
b64d_b1[b2] + b64d_b2[b3],
b64d_c1[b3] + b64d[b4]
end
--------------------------------------------------------------------------------
-- decode_tail64
--
-- Send the end of stream bytes that didn't get decoded via the main loop.
--
local function decode_tail64( out, e1, e2 ,e3, e4 )
if tail_padd64[2] == "" or e4 == tail_padd64[2]:byte() then
local n3 = b64d_z
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
n3 = e3
end
-- Unpack the six bit values into the 8 bit values
local b1, b2 = d64( e1, e2, n3, b64d_z )
-- And add them to the res table
if e3 ~= nil and e3 ~= tail_padd64[2]:byte() then
out( string.char( b1, b2 ) )
else
out( string.char( b1 ) )
end
end
end
--------------------------------------------------------------------------------
-- decode64_io_iterator
--
-- Create an io input iterator to read an input file and split values for
-- proper decoding.
--
local function decode64_io_iterator( file )
local ii = { }
-- An enumeration coroutine that handles the reading of an input file
-- to break data into proper pieces for building the original string.
--
local function enummerate( file )
local sc=string.char
local sb=string.byte
local ll="" -- last line storage
local len
local yield = coroutine.yield
-- Read a "reasonable amount" of data into the line buffer. Line by
-- line is not used so that a file with no line breaks doesn't
-- cause an inordinate amount of memory usage.
--
for cl in file:lines(2048) do
-- Reset the current line to contain valid chars and any previous
-- "leftover" bytes from the previous read
--
cl = ll .. cl:gsub(pattern_strip,"")
-- | |
-- | Remove "Invalid" chars (white space etc)
-- |
-- Left over from last line
--
len = (#cl-4)-(#cl%4)
-- see the comments in decode64_with_predicate for a rundown of
-- the results of this loop (sans the coroutine)
for i=1,len,4 do
yield( sc( d64( sb( cl, i, i+4 ) ) ) )
end
ll = cl:sub( len +1, #cl )
end
local l = #ll
if l >= 4 and ll:sub(-1) ~= tail_padd64[2] then
yield( sc( d64( sb( ll, 1, 4 ) ) ) )
l=l-4
end
if l > 0 then
local e1,e2,e3,e4 = ll:byte( 0 - l, -1 )
if e1 ~= nil then
decode_tail64( function(s) yield( s ) end, e1, e2, e3, e4 )
end
end
end
-- Returns an input iterator that is implemented as a coroutine. Each
-- yield of the co-routine sends reconstructed bytes to the loop handling
-- the iteration.
--
function ii.begin()
local co = coroutine.create( function() enummerate(file) end )
return function()
local code,res = coroutine.resume(co)
assert(code == true)
return res
end
end
return ii
end
--------------------------------------------------------------------------------
-- decode64_with_ii
--
-- Convert the value provided by a decode iterator that provides a begin
-- method, a tail method, and an iterator that returns four (usable!) bytes
-- for each call until at the end.
--
local function decode64_with_ii( ii, out )
-- Uses the iterator to pull values. Each reconstructed string
-- is sent to the output predicate.
--
for l in ii.begin() do out( l ) end
end
--------------------------------------------------------------------------------
-- decode64_with_predicate
--
-- Decode an entire base64 encoded string in memory using the predicate for
-- output.
--
local function decode64_with_predicate( raw, out )
-- Sanitize the input to strip characters that are not in the alphabet.
--
-- Note: This is a deviation from strict implementations where "bad data"
-- in the input stream is unsupported.
--
local san = raw:gsub(pattern_strip,"")
local len = #san-#san%4
local rem = #san-len
local sc = string.char
local sb = string.byte
if san:sub(-1,-1) == tail_padd64[2] then
rem = rem + 4
len = len - 4
end
for i=1,len,4 do
out( sc( d64( sb( san, i, i+4 ) ) ) )
end
if rem > 0 then
decode_tail64( out, sb( san, 0-rem, -1 ) )
end
end
--------------------------------------------------------------------------------
-- decode64_tostring
--
-- Takes a string that is encoded in base64 and returns the decoded value in
-- a new string.
--
local function decode64_tostring( raw )
local sb={} -- table to build string
local function collection_predicate(v)
sb[#sb+1]=v
end
decode64_with_predicate( raw, collection_predicate )
return table.concat(sb)
end
--------------------------------------------------------------------------------
-- set_and_get_alphabet
--
-- Sets and returns the encode / decode alphabet.
--
--
local function set_and_get_alphabet(alpha,term)
if alpha ~= nil then
local magic=
{
-- ["%"]="%%",
[" "]="% ",
["^"]="%^",
["$"]="%$",
["("]="%(",
[")"]="%)",
["."]="%.",
["["]="%[",
["]"]="%]",
["*"]="%*",
["+"]="%+",
["-"]="%-",
["?"]="%?",
}
c_alpha=known_base64_alphabets[alpha]
if c_alpha == nil then
c_alpha={ _alpha=alpha, _term=term }
end
assert( #c_alpha._alpha == 64, "The alphabet ~must~ be 64 unique values." )
assert( #c_alpha._term <= 1, "Specify zero or one termination character.")
b64d={} -- Decode table alpha -> right shifted int values
b64e={} -- Encode table 0-63 (6 bits) -> char table
local s=""
for i = 1,64 do
local byte = c_alpha._alpha:byte(i)
local str = string.char(byte)
b64e[i-1]=byte
assert( b64d[byte] == nil, "Duplicate value '"..str.."'" )
b64d[byte]=i-1
s=s..str
end
local ext --Alias for extraction routine that avoids extra table lookups
if bit32 then
ext = bit32.extract -- slight speed, vast visual (IMO)
elseif bit then
local band = bit.band
local rshift = bit.rshift
ext =
function(n, field, width)
width = width or 1
return band(rshift(n, field), 2^width-1)
end
else
error("Neither Lua 5.2 bit32 nor LuaJit bit library found!")
end
-- preload encode lookup tables
b64e_a = {}
b64e_a2 = {}
b64e_b1 = {}
b64e_b2 = {}
b64e_c1 = {}
b64e_c = {}
for f = 0,255 do
b64e_a [f]=b64e[ext(f,2,6)]
b64e_a2 [f]=ext(f,0,2)*16
b64e_b1 [f]=ext(f,4,4)
b64e_b2 [f]=ext(f,0,4)*4
b64e_c1 [f]=ext(f,6,2)
b64e_c [f]=b64e[ext(f,0,6)]
end
-- preload decode lookup tables
b64d_a1 = {}
b64d_a2 = {}
b64d_b1 = {}
b64d_b2 = {}
b64d_c1 = {}
b64d_z = b64e[0]
for k,v in pairs(b64d) do
-- Each comment shows the rough C expression that would be used to
-- generate the returned triple.
--
b64d_a1 [k] = v*4 -- ([b1] ) << 2
b64d_a2 [k] = math.floor( v / 16 ) -- ([b2] & 0x30) >> 4
b64d_b1 [k] = ext( v, 0, 4 ) * 16 -- ([b2] & 0x0F) << 4
b64d_b2 [k] = math.floor( v / 4 ) -- ([b3] & 0x3c) >> 2
b64d_c1 [k] = ext( v, 0, 2 ) * 64 -- ([b3] & 0x03) << 6
end
if c_alpha._term ~= "" then
tail_padd64[1]=string.char(c_alpha._term:byte(),c_alpha._term:byte())
tail_padd64[2]=string.char(c_alpha._term:byte())
else
tail_padd64[1]=""
tail_padd64[2]=""
end
local esc_term
if magic[c_alpha._term] ~= nil then
esc_term=c_alpha._term:gsub(magic[c_alpha._term],function (s) return magic[s] end)
elseif c_alpha._term == "%" then
esc_term = "%%"
else
esc_term=c_alpha._term
end
if not c_alpha._strip then
local p=s:gsub("%%",function (s) return "__unique__" end)
for k,v in pairs(magic)
do
p=p:gsub(v,function (s) return magic[s] end )
end
local mr=p:gsub("__unique__",function() return "%%" end)
c_alpha._strip = string.format("[^%s%s]",mr,esc_term)
end
assert( c_alpha._strip )
pattern_strip = c_alpha._strip
local c =0 for i in pairs(b64d) do c=c+1 end
assert( c_alpha._alpha == s, "Integrity error." )
assert( c == 64, "The alphabet must be 64 unique values." )
if esc_term ~= "" then
assert( not c_alpha._alpha:find(esc_term), "Tail characters must not exist in alphabet." )
end
if known_base64_alphabets[alpha] == nil then
known_base64_alphabets[alpha]=c_alpha
end
end
return c_alpha._alpha,c_alpha._term
end
--------------------------------------------------------------------------------
-- encode64
--
-- Entry point mode selector.
--
--
local function encode64(i,o)
local method
if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end
if type(i) == "string" then
if type(o) == "function" then
method = encode64_with_predicate
else
assert( o == nil, "unsupported request")
method = encode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = encode64_io_iterator(i)
method = encode64_with_ii
else
assert( false, "unsupported mode" )
end
return method(i,o)
end
--------------------------------------------------------------------------------
-- decode64
--
-- Entry point mode selector.
--
--
local function decode64(i,o)
local method
if o ~= nil and io.type(o) == "file" then
local file_out = o
o = function(s) file_out:write(s) end
end
if type(i) == "string" then
if type(o) == "function" then
method = decode64_with_predicate
else
assert( o == nil, "unsupported request")
method = decode64_tostring
end
elseif io.type(i) == "file" then
assert( type(o) == "function", "file source requires output predicate")
i = decode64_io_iterator(i)
method = decode64_with_ii
else
assert( false, "unsupported mode" )
end
return method(i,o)
end
set_and_get_alphabet("base64")
--[[**************************************************************************]]
--[[****************************** Module **********************************]]
--[[**************************************************************************]]
function p.base64(frame)
local args
local data
local encode
if frame == mw.getCurrentFrame() then
args = frame.args
else
args = frame
end
data = args.data
encode = args.encode
yesno(encode, false)
if (encode == true) then
base64.encode(data)
return
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
else
base64.decode(data)
return
{
encode = encode64,
decode = decode64,
alpha = set_and_get_alphabet,
}
end
end
return p