check this out then --> LOLCODE has also inspired LOLPython !
you can has python classez
and this is a lolpython to python converter have fun !
#!/usr/bin/env python
# Implementation of the LOLPython language.
# Converts from LOLPython to Python then optionally runs the Python.
# This package depends on PLY -- http://www.dabeaz.com/ply/
# Written by Andrew Dalke <dalke@dalkescientific.com>
# Dalke Scientific Software, LLC
# 1 June 2007, Gothenburg, Sweden
#
# This software is in the public domain. For details see:
# http://creativecommons.org/licenses/publicdomain/
import sys
import keyword
import os
import types
from cStringIO import StringIO
from ply import *
__NAME__ = "lolpython"
__VERSION__ = "1.0"
# Translating LOLPython tokens to Python tokens
# This could be cleaned up. For example, some of
# these tokens could be merged into one.
tokens = (
"NAME", # variable names
"RESERVED", # Used for Python reserved names
"NUMBER", # Integers and floats
"STRING",
"OP", # Like the Python OP
"CLOSE", # Don't really need this..
"COMMENT",
"AUTOCALL", # write t.value then add '('
"INLINE", # write t.value directly
"FUTURE", # for the "I FUTURE CAT WITH" statement
"PRINT", # VISIBLE -> stdout or COMPLAIN -> stderr
"ENDMARKER",
"COLON",
"WS",
"NEWLINE",
)
# Helper functions for making given token types
def OP(t, value):
t.type = "OP"
t.value = value
return t
def RESERVED(t, value):
t.type = "RESERVED"
t.value = value
return t
def AUTOCALL(t, value):
t.type = "AUTOCALL"
t.value = "tuple"
t.lexer.paren_stack.append(")")
return t
def INLINE(t, value):
t.type = "INLINE"
t.value = value
return t
#####
# ply uses a large regex for token detection, and sre is limited to
# 100 groups. This grammar pushes the limit. I use (?:non-grouping)
# parens to keep the count down.
def t_ASSIGN(t): # cannot be a simple pattern because it must
r'CAN[ ]+HA[SZ]\b' # come before the t_NAME definition
return OP(t, "=")
def t_SINGLE_QUOTE_STRING(t):
r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
t.type = "STRING"
t.value = t.value[1:-1].decode("string-escape")
return t
def t_DOUBLE_QUOTE_STRING(t):
r'"([^\\"]+|\\"|\\\\)*"'
t.type = "STRING"
t.value = t.value[1:-1].decode("string-escape")
return t
# and LOL quoted strings! They end with /LOL
# No way to have "/LOL" in the string.
def t_LOL_STRING(t):
r"LOL[ ]*((?!/LOL).|\n)*[ ]*/LOL"
t.type = "STRING"
t.value = t.value[3:-4].strip(" ")
return t
# Aliases for the same thing - for extra cuteness
def t_LSQUARE(t):
r"(?:SOME|LOOK[ ]AT|LET[ ]+THE)\b"
t.lexer.paren_stack.append(']')
return OP(t, "[")
def t_LPAREN(t):
r"(?:WIT|THEZ)\b"
t.lexer.paren_stack.append(')')
return OP(t, "(")
def t_LBRACE(t):
r"BUCKET\b"
t.lexer.paren_stack.append("}")
return OP(t, "{")
def t_CLOSE(t):
r"(?:OK(!+|\b)|!+)"
stack = t.lexer.paren_stack
if t.value.startswith("OK"):
num_closes = len(t.value)-1 # OK -> 1, OK! -> 2, OK!!->3
else:
num_closes = len(t.value) # ! -> 1, !! -> 2
# Which close is this? I use "OK" to match (, [ and {
if len(stack) < num_closes:
raise AssertionError("not enough opens on the stack: line %d"
% (t.lineno,))
t.value = "".join(stack[-num_closes:][::-1])
del stack[-num_closes:]
return t
def t_EQ(t):
r"KINDA[ ]+LIKE\b"
return OP(t, "==")
def t_NE(t):
r"(?:KINDA[ ]+)?NOT[ ]+LIKE\b"
return OP(t, "!=")
def t_is(t):
r"KINDA[ ]+IS\b"
return RESERVED(t, "is")
def t_GT(t):
r"ATE[ ]+MORE[ ]+CHEEZBURGERS?[ ]+THAN\b"
return OP(t, ">")
def t_LT(t):
r"ATE[ ]+FEWER[ ]+CHEEZBURGERS?[ ]+THAN\b"
return OP(t, "<")
def t_GTE(t):
r"BIG[ ]+LIKE\b"
return OP(t, ">=")
def t_LTE(t):
r"SMALL[ ]+LIKE\b"
return OP(t, "<=")
def t_RETURN(t):
r"U[ ]+TAKE\b"
return RESERVED(t, "return")
def t_yield(t):
r"U[ ]+BORROW\b"
return RESERVED(t, "yield")
def t_ELIF(t):
r"OR[ ]+IZ\b"
return RESERVED(t, "elif")
def t_ELSE(t):
r"(?:(?:I[ ]+GIVE[ ]+UP|IZ[ ]+KEWL|ALL[ ]+DONE)|NOPE)\b"
return RESERVED(t, "else")
def t_COLON(t):
r"\?"
t.value = ":"
return t
def t_FROM(t):
r"IN[ ]+MAI\b"
return RESERVED(t, "from")
def t_EXCEPT(t):
r"O[ ]+NOES\b"
return RESERVED(t, "except")
def t_PLUS(t):
r"ALONG[ ]+WITH\b"
return OP(t, "+")
def t_MINUS(t):
r"TAKE[ ]+AWAY\b"
return OP(t, "-")
def t_PLUS_EQUAL(t):
r"GETZ[ ]+ANOTHR\b"
return OP(t, "+=")
def t_MINUS_EQUAL(t):
r"THROW[SZ]?[ ]+AWAY\b"
return OP(t, "-=")
def t_DIV(t):
r"SMASHES[ ]+INTO\b"
return OP(t, "/")
def t_DIV_EQUAL(t):
r"SMASHES[ ]+INTO[ ]+HAS\b"
return OP(t, "/=")
def t_TRUEDIV(t):
r"SMASHES[ ]+NICELY[ ]+INTO\b"
return OP(t, "//")
def t_MUL(t):
r"OF[ ]THOSE\b"
return OP(t, "*")
def t_MUL_EQUAL(t):
r"COPIES[ ]+(?:HIM|HER|IT)SELF[ ]+BY\b"
return OP(t, "*=")
def t_POW(t):
r"BY[ ]+GRAYSKULL[ ]+POWER"
return OP(t, "**")
def t_IN(t):
r"IN[ ]+(?:UR|THE|THIS)\b"
return OP(t, "in")
def t_del(t):
r"DO[ ]+NOT[ ]+WANT\b"
return RESERVED(t, "del")
def t_and(t):
r"\&"
return RESERVED(t, "and")
def t_or(t):
r"OR[ ]+MABEE\b"
return RESERVED(t, "or")
def t_pass(t):
r"I[ ]+IZ[ ]+CUTE\b"
return RESERVED(t, "pass")
def t_forever(t):
r"WHILE[ ]+I[ ]+CUTE\b"
return INLINE(t, "while 1")
def t_def(t):
r"SO[ ]+IM[ ]+LIKE\b"
return RESERVED(t, "def")
def t_class(t):
r"ME[ ]+MAKE[ ]\b"
return RESERVED(t, "class")
def t_future(t):
r"I[ ]+FUTURE[ ]+CAT[ ]+WITH\b"
t.type = "FUTURE"
return t
def t_assert(t):
r"SO[ ]+GOOD\b"
return RESERVED(t, "assert")
def t_assert_not(t):
r"AINT[ ]+GOOD\b"
return INLINE(t, "assert not ")
def t_for(t):
r"GIMME[ ]+EACH\b"
return RESERVED(t, "for")
def t_list(t):
r"ALL[ ]+OF\b"
return AUTOCALL(t, "tuple")
RESERVED_VALUES = {
"EASTERBUNNY": ("NUMBER", "0"),
"CHEEZBURGER": ("NUMBER", "1"),
"CHOKOLET": ("NUMBER", "-1"),
"TWIN": ("NUMBER", "2"),
"TWINZ": ("NUMBER", "2"),
"TWINS": ("NUMBER", "2"),
"EVILTWIN": ("NUMBER", "-2"),
"EVILTWINZ": ("NUMBER", "-2"),
"EVILTWINS": ("NUMBER", "-2"),
"ALLFINGERZ": ("NUMBER", "10"),
"TOEZ": ("NUMBER", "-10"),
"ONE": ("NUMBER", "1"),
"ONCE": ("NUMBER", "1"),
"TWO": ("NUMBER", "2"),
"TWICE": ("NUMBER", "2"),
"THR33": ("NUMBER", "3"),
"FOUR": ("NUMBER", "4"),
"FIV": ("NUMBER", "5"),
"SIKS": ("NUMBER", "6"),
"SEVN": ("NUMBER", "7"),
"ATE": ("NUMBER", "8"),
"NINE": ("NUMBER", "9"),
"MEH": ("NAME", "False"),
"YEAH": ("NAME", "True"),
"VISIBLE": ("PRINT", "stdout"),
"COMPLAIN": ("PRINT", "stderr"),
"AND": ("OP", ","),
"BLACKHOLE": ("RESERVED", "ZeroDivisionError"),
"DONOTLIKE": ("AUTOCALL", "AssertionError"),
"ANTI": ("OP", "-"),
"IZ": ("RESERVED", "if"),
"GIMME": ("RESERVED", "import"),
"LIKE": ("RESERVED", "as"),
"OWN": ("OP", "."),
"PLZ": ("RESERVED", "try"),
"HALP": ("RESERVED", "raise"),
"WHATEVER": ("RESERVED", "finally"),
"KTHX": ("RESERVED", "continue"),
"KTHXBYE": ("RESERVED", "break"),
"OVER": ("OP", "/"),
"AINT": ("RESERVED", "not"),
"ME": ("RESERVED", "self"),
"STRING": ("AUTOCALL", "str"),
"NUMBR": ("AUTOCALL", "int"),
"BIGNESS": ("AUTOCALL", "len"),
"NUMBRZ": ("AUTOCALL", "range"),
"ADDED": ("AUTOCALL", ".append"),
"ARGZ": ("INLINE", "_lol_sys.argv"),
"THINGZ": ("INLINE", "()"), # invisible tuple didn't sound right
"THING": ("INLINE", "()"), # sometimes it's better in singular form
"MY": ("INLINE", "self."),
"MYSELF": ("INLINE", "(self)"),
"EVEN": ("INLINE", "% 2 == 0"),
"ODD": ("INLINE", "% 2 == 1"),
"WIF": ("RESERVED", "with"),
}
def t_FLOAT(t):
r"""(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]? \d+)?"""
t.value = t.value
t.type = "NUMBER"
return t
def t_INT(t):
r"\d+"
t.type = "NUMBER"
return t
def t_INVISIBLE(t):
r"INVISIBLE([ ]+(LIST|STRING|BUCKET))?\b"
if "LIST" in t.value:
t.type = "INLINE"
t.value = "[]"
elif "STRING" in t.value:
t.type = "INLINE"
t.value = '""'
elif "BUCKET" in t.value:
t.type = "INLINE"
t.value = "{}"
else:
RESERVED(t, "None")
return t
# Not consuming the newline. Needed for "IZ EASTERBUNNY? BTW comment"
def t_COMMENT(t):
r"[ ]*(?:BTW|WTF)[^\n]*"
return t
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
if t.value in RESERVED_VALUES:
type, value = RESERVED_VALUES[t.value]
t.type = type
t.value = value
if t.type == "AUTOCALL":
t.lexer.paren_stack.append(")")
return t
def t_WS(t):
r' [ ]+ '
if t.lexer.at_line_start and not t.lexer.paren_stack:
return t
# Don't generate newline tokens when inside of parens
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t.type = "NEWLINE"
if not t.lexer.paren_stack:
return t
def t_error(t):
raise SyntaxError("Unknown symbol %r" % (t.value[0],))
print "Skipping", repr(t.value[0])
t.lexer.skip(1)
## I implemented INDENT / DEDENT generation as a post-processing filter
# The original lex token stream contains WS and NEWLINE characters.
# WS will only occur before any other tokens on a line.
# I have three filters. One tags tokens by adding two attributes.
# "must_indent" is True if the token must be indented from the
# previous code. The other is "at_line_start" which is True for WS
# and the first non-WS/non-NEWLINE on a line. It flags the check so
# see if the new line has changed indication level.
# Python's syntax has three INDENT states
# 0) no colon hence no need to indent
# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
NO_INDENT = 0
MAY_INDENT = 1
MUST_INDENT = 2
# only care about whitespace at the start of a line
def track_tokens_filter(lexer, tokens):
lexer.at_line_start = at_line_start = True
indent = NO_INDENT
for token in tokens:
token.at_line_start = at_line_start
if token.type == "COLON":
at_line_start = False
indent = MAY_INDENT
token.must_indent = False
elif token.type == "NEWLINE":
at_line_start = True
if indent == MAY_INDENT:
indent = MUST_INDENT
token.must_indent = False
elif token.type == "WS":
assert token.at_line_start == True
at_line_start = True
token.must_indent = False
elif token.type == "COMMENT":
pass
else:
# A real token; only indent after COLON NEWLINE
if indent == MUST_INDENT:
token.must_indent = True
else:
token.must_indent = False
at_line_start = False
indent = NO_INDENT
yield token
lexer.at_line_start = at_line_start
def _new_token(type, lineno):
tok = lex.LexToken()
tok.type = type
tok.value = None
tok.lineno = lineno
tok.lexpos = -1
return tok
# Synthesize a DEDENT tag
def DEDENT(lineno):
return _new_token("DEDENT", lineno)
# Synthesize an INDENT tag
def INDENT(lineno):
return _new_token("INDENT", lineno)
# Track the indentation level and emit the right INDENT / DEDENT events.
def indentation_filter(tokens):
# A stack of indentation levels; will never pop item 0
levels = [0]
token = None
depth = 0
prev_was_ws = False
for token in tokens:
## if 1:
## print "Process", token,
## if token.at_line_start:
## print "at_line_start",
## if token.must_indent:
## print "must_indent",
## print
# WS only occurs at the start of the line
# There may be WS followed by NEWLINE so
# only track the depth here. Don't indent/dedent
# until there's something real.
if token.type == "WS":
assert depth == 0
depth = len(token.value)
prev_was_ws = True
# Don't forward WS to the parser
continue
if token.type == "NEWLINE":
depth = 0
if prev_was_ws or token.at_line_start:
# ignore blank lines
continue
# pass the other cases on through
yield token
continue
if token.type == "COMMENT":
yield token
continue
# then it must be a real token (not WS, not NEWLINE)
# which can affect the indentation level
prev_was_ws = False
if token.must_indent:
# The current depth must be larger than the previous level
if not (depth > levels[-1]):
raise IndentationError("expected an indented block")
levels.append(depth)
yield INDENT(token.lineno)
elif token.at_line_start:
# Must be on the same level or one of the previous levels
if depth == levels[-1]:
# At the same level
pass
elif depth > levels[-1]:
raise IndentationError("indentation increase but not in new block")
else:
# Back up; but only if it matches a previous level
try:
i = levels.index(depth)
except ValueError:
raise IndentationError("inconsistent indentation")
for _ in range(i+1, len(levels)):
yield DEDENT(token.lineno)
levels.pop()
yield token
### Finished processing ###
# Must dedent any remaining levels
if len(levels) > 1:
assert token is not None
for _ in range(1, len(levels)):
yield DEDENT(token.lineno)
# The top-level filter adds an ENDMARKER, if requested.
# Python's grammar uses it.
def token_filter(lexer, add_endmarker = True):
token = None
tokens = iter(lexer.token, None)
tokens = track_tokens_filter(lexer, tokens)
for token in indentation_filter(tokens):
yield token
if add_endmarker:
lineno = 1
if token is not None:
lineno = token.lineno
yield _new_token("ENDMARKER", lineno)
class LOLLexer(object):
def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
self.lexer = lex.lex(debug=debug, optimize=optimize,
lextab=lextab, reflags=reflags)
self.token_stream = None
def input(self, s, add_endmarker=True):
self.lexer.paren_stack = []
self.lexer.input(s)
self.token_stream = token_filter(self.lexer, add_endmarker)
def token(self):
try:
return self.token_stream.next()
except StopIteration:
return None
# Helper class to generate logically correct indented Python code
class IndentWriter(object):
def __init__(self, outfile):
self.outfile = outfile
self.at_first_column = True
self.indent = 0
def write(self, text):
if self.at_first_column:
self.outfile.write(" "*self.indent)
self.at_first_column = False
self.outfile.write(text)
# Split things up because the from __future__ statements must
# go before any other code.
HEADER = """# LOLPython to Python converter version 1.0
# Written by Andrew Dalke, who should have been working on better things.
"""
BODY = """
# sys is used for COMPLAIN and ARGZ
import sys as _lol_sys
"""
def to_python(s):
L = LOLLexer()
L.input(s)
header = StringIO()
header.write(HEADER)
header_output = IndentWriter(header)
body = StringIO()
body.write(BODY)
body_output = IndentWriter(body)
write = body_output.write
output = body_output
for t in iter(L.token_stream):
if t.type == "NAME":
# Need to escape names which are Python variables Do that
# by appending an "_". But then I also need to make sure
# that "yield_" does not collide with "yield". And you
# thought you were being clever trying to use a Python
# variable. :)
name = t.value.rstrip("_")
if name in keyword.kwlist:
write(t.value + "_ ")
else:
write(t.value + " ")
elif t.type in ("RESERVED", "OP", "NUMBER", "CLOSE"):
# While not pretty, I'll put a space after each
# term because it's the simplest solution. Otherwise
# I'll need to track the amount of whitespace between
# the tokens in the original text.
write(t.value+" ")
# XXX escape names which are special in Python!
elif t.type == "STRING":
write(repr(t.value) + " ")
elif t.type == "COMMENT":
# Not enough information to keep comments on the correct
# indentation level. This is good enough. Ugly though.
# Maybe I need to fix the tokenizer.
write("#"+ t.value[3:]+"\n")
output.at_first_column = True
elif t.type == "COLON":
write(":")
elif t.type == "INDENT":
output.indent += 1
pass
elif t.type == "DEDENT":
output.indent -= 1
pass
elif t.type == "NEWLINE":
write(t.value)
output.at_first_column = True
output = body_output
write = output.write
elif t.type == "PRINT":
if t.value == "stdout":
write("print ")
elif t.value == "stderr":
write("print >>_lol_sys.stderr, ")
else:
raise AssertionError(t.value)
elif t.type == "AUTOCALL":
write(t.value + "(")
elif t.type == "INLINE":
write(t.value)
elif t.type == "ENDMARKER":
write("\n# The end.\n")
elif t.type == "WS":
output.leading_ws = t.value
elif t.type == "FUTURE":
# Write to the header. This is a hack. Err, a hairball.
output = header_output
write = output.write
write("from __future__ import ")
else:
raise AssertionError(t.type)
return header.getvalue() + body.getvalue()
# API code for doing the translation and exec'ing the result
def execfile(infile, module_name="__lolmain__"):
"file, module_name -- exec the lolpython file in a newly created module"
if not hasattr(infile, "read"):
s = open(infile).read()
else:
s = infile.read()
return execstring(s, module_name)
def execstring(s, module_name="__lolmain__"):
"s, module_name -- exec the lolpython string in a newly created module"
python_s = to_python(s)
# Doing this bit of trickiness so I can have LOLPython code act
# like __main__. This fix is enough to fool unittest.
m = types.ModuleType(module_name)
sys.modules[module_name] = m
exec python_s in m.__dict__
return m
def convert_file(infile, outfile):
"read LOLPython code from infile, write converted Python code to outfile"
if not hasattr(outfile, "write"):
outfile = open(outfile, "w")
outfile.write(to_python(infile.read()))
def convert(filenames):
"convert LOLPython filenames into corresponding Python '.py' files"
if not filenames:
convert_file(sys.stdin, sys.stdout)
else:
for filename in filenames:
base, ext = os.path.splitext(filename)
convert_file(open(filename), open(base+".py", "w"))
def help():
print """convert and run a lolpython program
Commands are:
lolpython Read a lolpython program from stdin and execute it
lolpython --convert Convert a lolpython program from stdin
and generate python to stdout
lolpython --convert filename1 [filename....]
Convert a list of lolpython files into Python files
lolpython filename [arg1 [arg2 ...]]
Run a lolpython program using optional arguments
"""
def main(argv):
if len(argv) >= 2:
if argv[1] == "--convert":
convert(argv[2:])
return
if argv[1] == "--help":
help()
return
if argv[1] == "--version":
print __NAME__ + " " + __VERSION__
return
# otherwise, run the lolpython program
sys.argv = sys.argv[1:]
filename = sys.argv[0]
execfile(filename, "__main__")
else:
# commands from stdin
execfile(sys.stdin)
if __name__ == "__main__":
main(sys.argv)