(* ETH Oberon, Copyright 1990-2006 Computer Systems Institute, ETH Zurich, CH-8092 Zurich.
Desktops.OpenDoc http://www.oberon.ethz.ch/ and follow the "license agreement" link. *)
MODULE RFC3986 IN Oberon; (** portable *)
(* Desktops.OpenDoc http://www.apps.ietf.org/rfc/rfc3986.html .
https://en.wikipedia.org/wiki/Percent-encoding
Based upon procedures in HyperDocs.Mod of ETH Oberon / PC Native 05.01.2003.
The source and target parameters of exported procedures are strings.
A source string lacking a 0X string termination is rejected. ple 2005-09-07*)
IMPORT SYSTEM, Texts, Oberon, Strings;
CONST
hexdigits = "0123456789ABCDEF";
VAR
errors*: SHORTINT; (* Error counter *)
W: Texts.Writer;
hexdigitsarray: ARRAY 17 OF CHAR;
PROCEDURE Log(ch: CHAR);
BEGIN Texts.Write(W, ch) END Log;
PROCEDURE LogLn;
BEGIN Texts.WriteLn(W) END LogLn;
(* Precondition = string ends with 0X *)
PROCEDURE LogString(s: ARRAY OF CHAR);
BEGIN Texts.WriteString(W, s) END LogString;
(* Precondition: s contains characters and possibly 0X.
If 0X is present, stop the output there. *)
PROCEDURE LogChars(s: ARRAY OF CHAR);
VAR i: LONGINT;
BEGIN
i := 0;
WHILE (i < LEN(s)) & (s[i] # 0X) DO Texts.Write(W, s[i]); INC(i) END;
END LogChars;
(* PROCEDURE LogInt(x, n: LONGINT);
BEGIN Texts.WriteInt(W, x, n) END LogInt; *)
PROCEDURE LogAppend;
BEGIN Texts.Append(Oberon.Log, W.buf) END LogAppend;
PROCEDURE HexVal(ch: CHAR): INTEGER;
BEGIN
(* LogString("RFC3986.HexVal: invoked on ch = "); Log(22X); Log(ch);
Log(22X); LogString("."); LogLn; *)
(* We might hope that an application of CAP would remove the
need for the case of ch in a..z but CAP does not check the parameter.
Thus CAP("A") and CAP("2") shift characters which should not be
shifted. *)
(* ch := CAP(ch); *)
(* LogString("CAP(ch) = "); Log(HexDigit(ORD(ch) DIV 16));
Log(HexDigit(ORD(ch) MOD 16)); LogString("X."); LogLn; *)
IF (ch >= "0") & (ch <= "9") THEN
RETURN ORD(ch)-ORD("0")
ELSIF (ch >= "A") & (ch <= "F") THEN
RETURN ORD(ch)-ORD("A")+10
ELSIF (ch >= "a") & (ch <= "f") THEN
RETURN ORD(ch)-ORD("a")+10
ELSE
LogString("RFC3986.HexVal: invoked on illegal character."); LogLn;
RETURN 0
END
END HexVal;
PROCEDURE HexDigit(i: INTEGER): CHAR;
BEGIN
RETURN hexdigitsarray[i];
(* IF i < 10 THEN
RETURN CHR(i+ORD("0"))
ELSE
RETURN CHR(i-10+ORD("A"))
END *)
END HexDigit;
(** Escape codes are literalized or contracted.
E.g. "Hello%20World" becomes "Hello World". *)
PROCEDURE Decode*(VAR source, target: ARRAY OF CHAR);
VAR
is, it, slen, tlen: LONGINT;
ch: CHAR;
BEGIN
errors := 0;
is := 0; it := 0; slen := LEN(source); tlen := LEN(target);
IF SYSTEM.ADR(source[0]) = SYSTEM.ADR(target[0]) THEN
(* Aiming to put the literal version where the source exists. The literal
version is never larger than the escaped version. Array overflow is
impossible but check for presence of 0X. *)
LOOP
IF is = slen THEN (* 0X absent. *)
INC(errors);
LogString("RFC3986.Decode: 0X absent from "); LogLn;
LogString("source character array. Not literalizing."); LogLn;
RETURN
ELSIF source[is] = 0X THEN EXIT
ELSE INC(is);
END
END;
(* Now literalize. *)
is := 0;
LOOP
ch := source[is];
IF ch = 0X THEN target[it] := 0X; EXIT
ELSIF (ch = "%") & ((is+2) < slen) THEN
IF Strings.IsHexDigit(source[is+1]) & Strings.IsHexDigit(source[is+2])
THEN (* have a valid encoding such as %3d *)
target[it] := CHR(16*HexVal(source[is+1])+HexVal(source[is+2]));
INC(is); INC(is)
ELSE target[it] := ch; (* = "%" *)
END
ELSE target[it] := ch (* <> "%" *)
END;
INC(is); INC(it)
END
ELSE (* Source and target are distinct. Count characters to check
whether the literalized string will fit in the target array. *)
LOOP
IF is = slen THEN (* 0X absent. *)
INC(errors);
LogString("RFC3986.Decode: 0X absent from "); LogLn;
LogString("source character array. Not literalizing."); LogLn;
RETURN
ELSIF source[is] = 0X THEN EXIT
ELSIF (source[is] = "%") & ((is+2) < slen) THEN
IF Strings.IsHexDigit(source[is+1]) & Strings.IsHexDigit(source[is+2])
THEN (* have a valid encoding such as %3d *) INC(is, 2);
(* ELSE in all other cases, source[is..(is+2)] = "%GG" for example,
source[is] = "%" is left as literal. *)
END
END;
INC(is); INC(it)
END;
(* Index it now locates the terminal character of the hypothetical
literalized string. *)
IF it >= tlen THEN (* literalized string is too long for the target array. *)
INC(errors);
LogString("RFC3986.Decode: literalized string "); LogLn;
LogString("will be too long for the target "); LogLn;
LogString("array. Not literalizing."); LogLn;
RETURN
END;
is := 0; it := 0;
LOOP
ch := source[is];
IF (ch = "%") & ((is+2) < slen) THEN
IF Strings.IsHexDigit(source[is+1]) & Strings.IsHexDigit(source[is+2])
THEN (* have a valid encoding such as %3d *)
target[it] := CHR(16*HexVal(source[is+1])+HexVal(source[is+2]));
INC(is, 2)
ELSE target[it] := ch (* = "%" *)
END
ELSE target[it] := ch (* <> "%" *)
END;
IF target[it] = 0X THEN EXIT END;
INC(is); INC(it)
END
END
END Decode;
(** All special characters in source are escaped according to RFC 2396. E.g.
"a+b" becomes "a%2Bb". Special characters are: 1X .. 20X, "+", "&", "=",
"?", "%", "$", ";". "#", ":" & special.
Escaping a character makes the string longer. Overflow of the character
array is avoided.
Two procedures Encode1 and Encode2 follow. This Encode is a wrapper which can
invoke either of them according to choice of the user. *)
PROCEDURE Encode*(VAR source, target: ARRAY OF CHAR; special: CHAR);
PROCEDURE RequiresEscape(ch:CHAR):BOOLEAN;
BEGIN
IF (ch = 0X)
THEN
RETURN(FALSE)
ELSIF (ch <= 020X) OR (ch = "+") OR (ch = "&") OR (ch = "=") OR (ch = "?")
OR (ch = "%") OR (ch = "$") OR (ch = ";") OR (ch = "#") OR (ch = ":")
OR (ch = special)
THEN
RETURN(TRUE)
ELSE
RETURN(FALSE)
END
END RequiresEscape;
(** The source and target parameters can refer to one or two
actual parameters. With one actual parameter, the string is escaped
"in place". With two actual parameters, the source parameter is untouched. *)
PROCEDURE Encode1(VAR source, target: ARRAY OF CHAR; special: CHAR);
VAR
is, it, slen, tlen: LONGINT;
ch: CHAR;
BEGIN
errors := 0;
is := 0; it := 0; slen := LEN(source); tlen := LEN(target);
LOOP
IF is = slen THEN (* 0X absent. *)
DEC(is); DEC(it); INC(errors);
LogString("RFC3986.Encode.Encode1: 0X absent from "); LogLn;
LogString("character array. Not escaping."); LogLn;
RETURN
ELSIF source[is] = 0X THEN EXIT
ELSIF RequiresEscape(source[is]) THEN INC(it, 3); INC(is)
ELSE INC(it); INC(is)
END
END;
(* is now contains the index of the last character of the string;
"it" contains the index of the last character in the escaped version
and may be >= tlen. The source string ends with 0X. *)
IF it >= tlen THEN (* The escaped version is too long for the target array. *)
INC(errors);
LogString("RFC3986.Encode.Encode1: Escaped version of string "); LogLn;
Log(22X); LogString(source); Log(22X); LogLn;
LogString("will be too long for the array. Not escaping."); LogLn;
RETURN
END;
IF SYSTEM.ADR(source[0]) = SYSTEM.ADR(target[0]) THEN
(* Aiming to put the escaped version where the source exists.
Put the translated characters into the tail end of the array.
After translation is complete, shift the string up to the front of the array.
This avoids shifting the downstream characters for each character which
needs escaping. *)
it := tlen-1;
IF source[is] = 0X THEN target[it] := 0X; DEC(is); DEC(it)
ELSE
INC(errors);
LogString("RFC3986.Encode1: index of terminus of source string"); LogLn;
LogString("not located properly. Not escaping."); LogLn;
RETURN
END;
(* Proceed to check characters from the terminus towards the origin of
the array; escape as required. *)
LOOP
IF is = -1 THEN (* finished source array *) EXIT END;
ch := source[is];
IF RequiresEscape(ch) THEN
target[it] := HexDigit(ORD(ch) MOD 16); DEC(it);
target[it] := HexDigit(ORD(ch) DIV 16); DEC(it);
target[it] := "%"; DEC(it)
ELSE (* just copy the character *)
target[it] := ch; DEC(it)
END;
DEC(is)
END;
(* Encoding finished; shift the string to the origin of the array. *)
is := 0; INC(it);
LOOP
IF it = tlen THEN EXIT END;
target[is] := target[it];
IF target[it] = 0X THEN EXIT END;
INC(is); INC(it);
END
ELSE (* source and target are in distinct arrays. In this case work
from the head toward the tail. *)
is := 0; it := 0;
LOOP
IF is = slen THEN (* end of source array *) RETURN END;
(* check the character and translate if required; *)
IF RequiresEscape(source[is]) THEN
target[it] := "%"; INC(it);
target[it] := HexDigit(ORD(source[is]) DIV 16); INC(it);
target[it] := HexDigit(ORD(source[is]) MOD 16);INC(it)
ELSE (* just copy the character; *)
target[it] := source[is]; INC(it);
END;
IF source[is] = 0X THEN (* End of string; finished. *) EXIT END;
INC(is)
END; (* Loop over source characters. *)
END; (* Cases of single and distinct arrays for source and target. *)
RETURN
END Encode1;
(** The source and target parameters can refer to one or two
actual parameters. In this procedure an intermediate array is created to put
the escaped array into. After escaping, the intermediate array is copied to
the target. *)
PROCEDURE Encode2(VAR source, target: ARRAY OF CHAR; special: CHAR);
VAR
is, it, slen, tlen: LONGINT;
(* ch: CHAR; *)
intermediate: POINTER TO ARRAY OF CHAR;
BEGIN
errors := 0;
is := 0; it := 0; slen := LEN(source); tlen := LEN(target);
LOOP
IF is = slen THEN (* 0X absent. *)
DEC(is); DEC(it); INC(errors);
LogString("RFC3986.Encode.Encode2: 0X absent from character array."); LogLn;
RETURN
ELSIF source[is] = 0X THEN EXIT
ELSIF RequiresEscape(source[is]) THEN INC(it, 3); INC(is)
ELSE INC(it); INC(is)
END
END;
(* is now contains the index of the last character of the string;
"it" contains the index of the last character in the escaped version
and may be >= tlen. The source string ends with 0X. *)
IF it >= tlen THEN (* The escaped version is too long for the target array. *)
INC(errors);
LogString("RFC3986.Encode.Encode2: Escaped version of string "); LogLn;
Log(22X); LogString(source); Log(22X); LogLn;
LogString("will be too large for the array. Not escaping."); LogLn;
RETURN
END;
NEW(intermediate, it+1);
is := 0; it := 0;
LOOP
IF is = slen THEN (* end of source array *) EXIT END;
(* check the character and translate if required; *)
IF RequiresEscape(source[is]) THEN
intermediate[it] := "%"; INC(it);
intermediate[it] := HexDigit(ORD(source[is]) DIV 16); INC(it);
intermediate[it] := HexDigit(ORD(source[is]) MOD 16);INC(it)
ELSE
intermediate[it] := source[is]; INC(it);
(* LogChars("intermediate = "); LogChars(intermediate^); LogLn; *)
END;
IF source[is] = 0X THEN (* end of string *) EXIT END;
INC(is);
END; (* Loop over characters. *)
(* The escaped string is now in intermediate^. Copy intermediate to
target. *)
(* (LogString("intermediate = ");
Log(22X); LogChars(intermediate^); Log(22X); LogLn; *)
it := 0;
LOOP
IF it = tlen THEN
INC(errors);
LogString("RFC3986.Encode.Encode2: 0X not found at end of target string."); LogLn;
EXIT
END;
target[it] := intermediate[it];
IF target[it] = 0X THEN EXIT END;
INC(it)
END
END Encode2;
BEGIN
Encode1(source, target, special)
END Encode;
PROCEDURE Test*;
VAR
source5, intermediate5, target5: ARRAY 5 OF CHAR;
source6, intermediate6 (*, target6*) : ARRAY 6 OF CHAR;
source7, intermediate7, target7: ARRAY 7 OF CHAR;
source8, intermediate8 (*, target8*) : ARRAY 8 OF CHAR;
BEGIN
LogString("RFC3986.Test: begun."); LogLn;
LogLn;
LogString("Test with distinct source and target locations."); LogLn;
intermediate7 := "%GGlah"; (* Anomalous case suggested by Jan Verhoeven. *)
target7 := "------"; LogLn;
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogString("target7 = "); Log(22X); LogChars(target7); Log(22X); LogLn;
LogString("Invoke Decode(intermediate7, target7)."); LogLn;
Decode(intermediate7, target7);
LogString("target7 = "); Log(22X); LogChars(target7); Log(22X); LogLn;
LogLn;
LogAppend;
LogLn;
source5 := "blah";
intermediate5 := "----";
target5 := "----";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("intermediate5 = "); Log(22X); LogChars(intermediate5); Log(22X); LogLn;
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogString("Invoke Encode(source5, intermediate5, ..."); LogLn;
Encode(source5, intermediate5, " ");
LogString("intermediate5 = "); Log(22X); LogChars(intermediate5); Log(22X); LogLn;
LogString("Invoke Decode(intermediate5, target5)."); LogLn;
Decode(intermediate5, target5);
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "blah"; source5[4] := 73X;
intermediate5 := "----"; intermediate5[4] := 2DX;
target5 := "----";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("intermediate5 = "); Log(22X); LogChars(intermediate5); Log(22X); LogLn;
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogString("Invoke Encode(source5, intermediate5, ..."); LogLn;
Encode(source5, intermediate5, " ");
LogString("intermediate5 = "); Log(22X); LogChars(intermediate5); Log(22X); LogLn;
LogString("Invoke Decode.(intermediate5, target5)"); LogLn;
Decode(intermediate5, target5);
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "+lah";
intermediate7 := "------";
target5 := "----";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogString("Invoke Encode(source5, intermediate7, ..."); LogLn;
Encode(source5, intermediate7, " ");
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogString("Invoke Decode(intermediate7, target5)."); LogLn;
Decode(intermediate7, target5);
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "bla+";
intermediate6 := "-----";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("intermediate6 = "); Log(22X); LogChars(intermediate6); Log(22X); LogLn;
LogString("Invoke Encode(source5, intermediate6, ..."); LogLn;
Encode(source5, intermediate6, " ");
LogString("intermediate6 = "); Log(22X); LogChars(intermediate6); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "bla+";
intermediate7 := "------";
target5 := "----";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogString("Invoke Encode(source5, intermediate7, ..."); LogLn;
Encode(source5, intermediate7, " ");
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X);LogLn;
LogString("Invoke Decode(intermediate7, target5)."); LogLn;
Decode(intermediate7, target5);
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogLn;
LogAppend;
source8 := "bla+";
intermediate8 := "-------";
target5 := "----";
LogString("source8 = "); Log(22X); LogChars(source8); Log(22X); LogLn;
LogString("intermediate8 = "); Log(22X); LogChars(intermediate8); Log(22X); LogLn;
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogString("Invoke Encode(source8, intermediate8, ..."); LogLn;
Encode(source8, intermediate8, " ");
LogString("intermediate8 = "); Log(22X); LogChars(intermediate8); Log(22X); LogLn;
LogString("Invoke Decode(intermediate8, target5)."); LogLn;
Decode(intermediate8, target5);
LogString("target5 = "); Log(22X); LogChars(target5); Log(22X); LogLn;
LogLn;
LogAppend;
LogString("Now test with coincident source and target locations."); LogLn;
LogLn;
intermediate7 := "%GGlah"; (* Anomalous case suggested by Jan Verhoeven. *)
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogString("Invoke Decode(intermediate7, intermediate7)."); LogLn;
Decode(intermediate7, intermediate7);
LogString("intermediate7 = "); Log(22X); LogChars(intermediate7); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "blah";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Encode(source5, source5, ..."); LogLn;
Encode(source5, source5, " ");
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Decode(source5, source5)."); LogLn;
Decode(source5, source5);
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "blah"; source5[4] := 73X;
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Encode(source5, source5, ..."); LogLn;
Encode(source5, source5, " ");
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Decode(source5, source5)."); LogLn;
Decode(source5, source5);
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "+lah";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Encode(source5, source5, ..."); LogLn;
Encode(source5, source5, " ");
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Decode(source5, source5)."); LogLn;
Decode(source5, source5);
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogLn;
LogAppend;
source6 := "+lah";
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogString("Invoke Encode(source6, source6, ..."); LogLn;
Encode(source6, source6, " ");
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogString("Invoke Decode(source6, source6)."); LogLn;
Decode(source6, source6);
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogLn;
LogAppend;
source7 := "+lah";
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogString("Invoke Encode(source7, source7, ..."); LogLn;
Encode(source7, source7, " ");
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogAppend;
LogString("Invoke Decode(source7, source7)."); LogLn;
LogAppend;
Decode(source7, source7);
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogLn;
LogAppend;
source5 := "bla+";
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Encode(source5, source5, ..."); LogLn;
Encode(source5, source5, " ");
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogString("Invoke Decode(source5, source5)."); LogLn;
Decode(source5, source5);
LogString("source5 = "); Log(22X); LogChars(source5); Log(22X); LogLn;
LogLn;
LogAppend;
source6 := "bla+";
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogString("Invoke Encode(source6, source6, ..."); LogLn;
Encode(source6, source6, " ");
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogString("Invoke Decode(source6, source6)."); LogLn;
Decode(source6, source6);
LogString("source6 = "); Log(22X); LogChars(source6); Log(22X); LogLn;
LogLn;
LogAppend;
source7 := "bla+";
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogString("Invoke Encode(source7, source7, ..."); LogLn;
Encode(source7, source7, " ");
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogString("Invoke Decode(source7, source7)."); LogLn;
Decode(source7, source7);
LogString("source7 = "); Log(22X); LogChars(source7); Log(22X); LogLn;
LogLn;
LogAppend;
source8 := "bla+";
LogString("source8 = "); Log(22X); LogChars(source8); Log(22X); LogLn;
LogString("Invoke Encode(source8, source8, ..."); LogLn;
Encode(source8, source8, " ");
LogString("source8 = "); Log(22X); LogChars(source8); Log(22X); LogLn;
LogString("Invoke Decode(source8, source8)."); LogLn;
Decode(source8, source8);
LogString("source8 = "); Log(22X); LogChars(source8); Log(22X); LogLn;
LogLn;
LogAppend; (**)
LogString("RFC3986.Test: completed."); LogLn;
LogAppend
END Test;
BEGIN
Texts.OpenWriter(W);
hexdigitsarray := hexdigits;
END RFC3986.
RFC3986.Test