File size: 5,776 Bytes
04b0dd9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
"Soros interpreter (see http://numbertext.org)"
from __future__ import unicode_literals
from __future__ import print_function
import re
import sys
def run(program, data, lang):
return compile(program, lang).run(data)
def compile(program, lang):
return _Soros(program, lang)
# conversion function
def _tr(text, chars, chars2, delim):
for i in range(0, len(chars)):
text = text.replace(delim + chars[i], chars2[i])
return text
# string literals for metacharacter encoding
_m = "\\\";#$()|[]"
# Unicode private area
_c = u"\uE000\uE001\uE002\uE003\uE004\uE005\uE006\uE007\uE008\uE009"
_pipe = u"\uE003"
# separator prefix = \uE00A
# pattern to recognize function calls in the replacement string
_func = re.compile(_tr(r"""(?:\|?(?:\$\()+)? # optional nested calls
(\|?\$\(([^\(\)]*)\)\|?) # inner call (2 subgroups)
(?:\)+\|?)?""", # optional nested calls
_m[4:8], _c[:4], "\\"), re.X) # \$, \(, \), \| -> \uE000..\uE003
class _Soros:
def __init__(self, prg, lang):
self.lines = []
if prg.find("__numbertext__") == -1:
prg = "__numbertext__;" + prg
# default left zero deletion
# and separator function (no separation, if subcall returns with empty string)
prg = prg.replace("__numbertext__", u"""0+(0|[1-9]\\d*) $1
\"([a-z][-a-z]* )0+(0|[1-9]\\d*)\" $(\\1\\2)
\"\uE00A(.*)\uE00A(.+)\uE00A(.*)\" \\1\\2\\3
\"\uE00A.*\uE00A\uE00A.*\"
""")
prg = _tr(prg, _m[:4], _c[:4],
"\\") # \\, \", \;, \# -> \uE000..\uE003
# switch off all country-dependent lines, and switch on the requested ones
prg = re.sub(
r"(^|[\n;])([^\n;#]*#[^\n]*[\[]:[^\n:\]]*:][^\n]*)", r"\1#\2", prg)
prg = re.sub(r"(^|[\n;])#([^\n;#]*#[^\n]*[\[]:" +
lang.replace("_", "-") + r":][^\n]*)", r"\1\2", prg)
matchline = re.compile("^\s*(\"[^\"]*\"|[^\s]*)\s*(.*[^\s])?\s*$")
prefix = ""
for s in re.sub("(#[^\n]*)?(\n|$)", ";", prg).split(";"):
macro = re.match("== *(.*[^ ]?) ==", s)
if macro != None:
prefix = macro.group(1)
continue
m = matchline.match(s)
if prefix != "" and s != "" and m != None:
s = m.group(1).strip("\"")
space = " " if s != "" else ""
caret = ""
if s[0:1] == "^":
s = s[1:]
caret = "^"
s2 = m.group(2) if m.group(2) != None else ""
s = "\"" + caret + prefix + space + s + "\" " + s2
m = matchline.match(s)
if m != None:
s = _tr(m.group(1).strip("\""), _c[1:4], _m[1:4], "") \
.replace(_c[_m.find("\\")], "\\\\") # -> \\, ", ;, #
if m.group(2) != None:
s2 = m.group(2).strip("\"")
else:
s2 = ""
# \$, \(, \), \|, \[, \] -> \uE004..\uE009
s2 = _tr(s2, _m[4:], _c[4:], "\\")
# call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... )
s2 = re.sub(r"[\[]\$(\d\d?|\([^\)]+\))",
u"$(\uE00A\uE00A|$\\1\uE00A", s2)
s2 = re.sub(r"[\[]([^\$[\\]*)\$(\d\d?|\([^\)]+\))",
u"$(\uE00A\\1\uE00A$\\2\uE00A", s2)
# add "|" in terminating position
s2 = re.sub(r"\uE00A]$", "|\uE00A)", s2)
s2 = re.sub(r"]", ")", s2)
s2 = re.sub(r"(\$\d|\))\|\$", r"\1||$",
s2) # $()|$() -> $()||$()
# \uE000..\uE003-> \, ", ;, #
s2 = _tr(s2, _c[:4], _m[:4], "")
# $, (, ), | -> \uE000..\uE003
s2 = _tr(s2, _m[4:8], _c[:4], "")
# \uE004..\uE009 -> $, (, ), |, [, ]
s2 = _tr(s2, _c[4:], _m[4:], "")
s2 = re.sub(r"\\(\d)", r"\\g<\1>",
re.sub(r"\uE000(\d)", "\uE000\uE001\\\\g<\\1>\uE002", s2))
try:
self.lines = self.lines + [[
re.compile("^" + s.lstrip("^").rstrip("$") + "$"),
s2, s[:1] == "^", s[-1:] == "$"]]
except:
print("Error in following regex line: " + s, file=sys.stderr)
raise
def run(self, data):
return self._run(data, True, True)
def _run(self, data, begin, end):
for i in self.lines:
if not ((begin == False and i[2]) or (end == False and i[3])):
m = i[0].match(data)
if m:
try:
s = m.expand(i[1])
except:
print("Error for the following input: " +
data, file=sys.stderr)
raise
n = _func.search(s)
while n:
b = False
e = False
if n.group(1)[0:1] == _pipe or n.group()[0:1] == _pipe:
b = True
elif n.start() == 0:
b = begin
if n.group(1)[-1:] == _pipe or n.group()[-1:] == _pipe:
e = True
elif n.end() == len(s):
e = end
s = s[:n.start(1)] + self._run(n.group(2),
b, e) + s[n.end(1):]
n = _func.search(s)
return s
return ""
|