Memelang: Token-Terse Query Language

Memelang v11

Watch video on YouTube

Memelang is an AI-optimized query language that significantly reduces token count and model size for LLM text-to-SQL. The code below is designed to be copy-and-pasted into your LLM.

arXiv Paper · GitHub Repo · Patent Spec

document.getElementById('copy').style.background='',500);return false">Copy all code # info@memelang.net | (c)2026 HOLTWORK LLC | Patented # MEMELANG is a terse query DSL IR for LLM text-to-SQL # Axial grammar: Axis2 -> Axis1 -> Axis0 -> Cell # Whitespaces are syntactic and trigger "new Cell" # Never space between operator/comparator/comma/flag and values

MEMELANG_VER = 11.04

basic_syntax = '[table WS] [column WS] [":$" var][":" ("min"|"max"|"cnt"|"sum"|"avg"|"last"|"grp")] [":" ("asc"|"des")] ["" "\"" string "\""] [("="|"!="|">"|"="|"0; rating :DESC="Decimal 0-5 star rating of performance";:dec>0.0;0; name :DESC="Actor's full name";:str; age :DESC="Actor's age in years";:int>=0;0; description :DESC="Brief description of movie plot";:str; year :DESC="Year of production AD";:int>1800;=41; _;;

""" Role 567 and 8901 """ roles id 567,8901; _;;

""" Films with dystopian society narratives sim>.33 """ movies description "dystopian"=20;=4.2; actor :grp;;

""" Minimum role rating by actor, low to high """ roles rating :min:asc; actor :grp;;

""" Roles in movies mentioning robot rated 3+ """ movies description "robot"=3;;

""" Costars seen with Bruce Willis or Uma Thurman """ roles actor :$a~"Bruce Willis","Uma Thurman"; movie _;@ @ @; actor !$a;;

""" War stories before 1980: top 12 movies by minimum role rating """ movies year "war""robot"; year >=1900; "robot"; %col=year; >=1900; "robot" >=1900; "robot"; #year; >=1900; "robot"; :#year>=1900; ||'), ('CMP', r'>=||{p})" for k, p in CELL_PATTERN))

PAD_MODES = {'qry','tab'} FLAG_KINDS = {'FLAG','BIND','EVAR','ASSN'} LIT_KINDS = {'TIM','DEC','INT','ALN','QUO','EMB'} VAR_KINDS = {'VAR','WLD','REL','EVAR','SLOT'} DAT_KINDS = LIT_KINDS | VAR_KINDS RELCOORD = { '@0': ['-1','-1'], '@1': ['-1','-2'], '@2': ['-1','-3'], '@3': ['-1','-4'], '@4': ['-1','-5'], '@' : ['-1','+0'], '^' : ['-1','end','+0'],

# Atomic token class Tok: def __init__(self, kind: str, src: str, canon: Optional[str] = None): self.kind = kind self.src = src canon = src if canon is None else canon self.canon = CANON.get(canon) or canon parser = {'QUO': json.loads, 'EMB': json.loads, 'DEC': float, 'INT': int}.get(kind) self.dat = parser(src) if parser else src def __str__(self): return self.src def __repr__(self): return self.canon def __eq__(self, other): return repr(self) == repr(other) def __hash__(self): return hash(self.src) def __bool__(self): return bool(self.src)

TOK_NULL = Tok('NULL', '')

# Sequence of tokens class Seq(list[Tok]): opr: Tok = TOK_NULL def __init__(self, *items): super().__init__(items) self.opr = TOK_NULL def __str__(self): return self.opr.src.join([str(t) for t in self if len(str(t)) or t.kind=='HOLD']) def __repr__(self): return self.opr.src.join([repr(t) for t in self])

# Predicate expression class Cell: flag: Seq left: Seq comp: Tok right: Seq padded = False

def __init__(self, src: str): self.left = Seq() self.flag = Seq() self.comp = Tok('EQL', '', '=') self.right = Seq(Tok('WLD', '', '_'))

toks = [] for m in CELL_REGEX.finditer(src): kind = m.lastgroup text = m.group() if kind == 'WS': continue if kind == 'MISMATCH': raise Err(f'E_TOK {text!r}') toks.append(Tok(kind, text))

i, n = 0, len(toks)

def peek(): return toks[i].kind if i = n: raise Err('E_EOF') t = toks[i] i += 1 return t

# FLAGS while peek() in FLAG_KINDS: self.flag.append(take())

# LEFT (prefix MOD) if peek() == 'MOD': self.left.opr = take() self.left.append(Tok('HOLD', '')) t = take() if not t.kind in DAT_KINDS: raise Err('E_TERM_DAT') self.left.append(t)

# COMPARATOR if peek() == 'CMP': self.comp = take() if not peek() in DAT_KINDS: raise Err('E_DAT')

# RIGHT (values, OR-joined) if peek() in DAT_KINDS: self.right.clear() while peek() in DAT_KINDS: self.right.append(take()) if peek() == 'OR': self.right.opr = take() if not peek() in DAT_KINDS: raise Err('E_OR_TRAIL')

if i != n: raise Err(f'E_EXPR_TRAIL {toks[i:]}')

# PLACEHOLDER: OVERWRITE WITH YOUR EMBEDDING FUNCTION def vectorize(self, tok: Tok) -> Tok: if tok.kind == 'EMB': return tok if tok.kind not in {'QUO', 'ALN'}: raise Err('E_EMBED') return Tok('EMB', json.dumps([0.1, 0.2]))

@property def single(self) -> Tok: return self.right[0] if self.comp.canon == '=' and len(self.right) == 1 else TOK_NULL

@property def literal(self) -> Tok: tok = self.single return tok if tok.kind in LIT_KINDS else TOK_NULL

def find(self, kind:str) -> Tok: return next((flag for flag in self.flag if flag.kind == kind), TOK_NULL)

def bind(self, tok: Tok): if tok not in self.flag: self.flag.append(tok)

def __str__(self) -> str: return f"{self.flag}{self.left}{self.comp}{self.right}"

def __repr__(self) -> str: return f"{self.flag!r}{self.left!r}{self.comp!r}{self.right!r}"

def __bool__(self)...

Memelang: Token-Terse Query Language

Related Articles

Amazon, Facebook, FBI have access to a private intelligence-sharing network

SpaceX not the behemoth everyone thought

The Mirror Is Part of the Machine

Elevated error rates on requests to multiple models

Donald Trump and sons to be 'forever' exempt from tax audits