File size: 4,680 Bytes
786340e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import sys
from os import access, R_OK
from os.path import isfile
from collections import defaultdict
import re
from smem_obj import SMEM_Obj, ObjType
def clean_smem_string_token(s):
return str(s).replace("|","")
def removeComments(s):
""" Trim any '#' content from the given string
"""
ind = str(s).find('#')
if (ind == -1):
return s
return s[:ind]
def read_tokens_from_lines(file):
retval = [] # A list of all tokens from the "smem --add {}" contents.
inSmemAdd = False # Whether the read is inside an "smem --add {}" command
try:
for line in file:
sline = line.lstrip()
# Don't use comments
if sline.startswith('#'):
continue
# Check if we're in an smem --add command
if not inSmemAdd:
if sline.startswith("smem --add {"):
inSmemAdd = True
# Only add from this line any content after the opening '{'
sline = sline[12:]
else:
continue
# Get the tokens: split on whitespace, pipe quotes, parentheses, and brackets, but only exclude whitespace delimiters and pipe quotes
regex_pattern = '[\s+]|\\|(.+?)\\||(?<=\\))|(?<=\\()|(?<=\\{)|(?<=\\})'
tokens = re.split(regex_pattern, removeComments(sline).rstrip())
while None in tokens:
tokens.remove(None)
while '' in tokens:
tokens.remove('')
# Check for the closing character
try:
ind = tokens.index('}')
# If no exception, it was found
tokens = tokens[:ind]
inSmemAdd = False
except:
pass
# Add this line to the return list
if len(tokens) > 0:
retval.extend(tokens)
except Exception as e:
print("ERROR extracting tokens from the given file: "+str(e), file=sys.stderr)
return None
return retval
"""
This method scans a file that holds an 'smem --add{}' command and returns the relevant tokens from that file
"""
def get_smem_tokens_from_local_file(filename):
# Error check for reading the file
if not isfile(filename):
print("ERROR in get_smem_tokens_from_local_file(): File does not exist: '"+str(filename)+"'.", file=sys.stderr)
return None
if not access(filename, R_OK):
print("ERROR in get_smem_tokens_from_local_file(): File is not readable: '"+str(filename)+"'.", file=sys.stderr)
return None
# Get the file content
with open(filename) as file:
retval = read_tokens_from_lines(file)
# All done
return retval
class SMEM_Parser():
def __init__(self):
# Init the data structured needed to parse the smem file
self.smem_var_obj_map = defaultdict(SMEM_Obj)
def parse_file(self, smem_tokens):
# Iterate through the tokens
current_obj = None
current_attr = None
isNextWMEId = False
for token in smem_tokens:
# Skip empty tokens
if len(token) == 0:
continue
# Get WME ID
if isNextWMEId:
current_obj = self.smem_var_obj_map[token]
current_obj.set_id_var(token)
isNextWMEId = False
continue
# Get start of obj
if token == '(':
isNextWMEId = True
continue
# Get end of obj
if token == ')':
current_obj = None
current_attr = None
continue
# Get attributes
if token.startswith('^'):
current_attr = token
continue
# Get values
if current_obj != None and current_attr != None:
# Add the WME to the current object
if token.startswith('<'):
token_val = self.smem_var_obj_map[token]
token_val.set_id_var(token)
else:
token_val = clean_smem_string_token(token)
current_obj.add_wme(current_attr, token_val)
else:
print("ERROR: Unexpected token '"+token+"'.")
return
def get_context_root(self):
for var,obj in self.smem_var_obj_map.items():
for (attr,val) in obj.wme_list:
if attr == "^context-root":
obj.obj_type = ObjType.CONTEXT
return obj
return None
|