我使用antlr4使用Python3生成python目标。antlr语法库中的g4语法文件。生成的Python3Lexer。py文件包含我需要翻译成python的Java代码。这里是它输出的两个java段,您也可以在python3语法文件中找到它们
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
// The stack that keeps track of the indentation level.
private java.util.Stack<Integer> indents = new java.util.Stack<>();
// The amount of opened braces, brackets and parenthesis.
private int opened = 0;
// The most recently produced token.
private Token lastToken = null;
@Override
public void emit(Token t) {
super.setToken(t);
tokens.offer(t);
}
@Override
public Token nextToken() {
// Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
// Remove any trailing EOF tokens from our buffer.
for (int i = tokens.size() - 1; i >= 0; i--) {
if (tokens.get(i).getType() == EOF) {
tokens.remove(i);
}
}
// First emit an extra line break that serves as the end of the statement.
this.emit(commonToken(Python3Parser.NEWLINE, "\n"));
// Now emit as much DEDENT tokens as needed.
while (!indents.isEmpty()) {
this.emit(createDedent());
indents.pop();
}
// Put the EOF back on the token stream.
this.emit(commonToken(Python3Parser.EOF, "<EOF>"));
}
Token next = super.nextToken();
if (next.getChannel() == Token.DEFAULT_CHANNEL) {
// Keep track of the last token on the default channel.
this.lastToken = next;
}
return tokens.isEmpty() ? next : tokens.poll();
}
private Token createDedent() {
CommonToken dedent = commonToken(Python3Parser.DEDENT, "");
dedent.setLine(this.lastToken.getLine());
return dedent;
}
private CommonToken commonToken(int type, String text) {
int stop = this.getCharIndex() - 1;
int start = text.isEmpty() ? stop : stop - text.length() + 1;
return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
}
static int getIndentationCount(String spaces) {
int count = 0;
for (char ch : spaces.toCharArray()) {
switch (ch) {
case '\t':
count += 8 - (count % 8);
break;
default:
// A normal space char.
count++;
}
}
return count;
}
boolean atStartOfInput() {
return super.getCharPositionInLine() == 0 && super.getLine() == 1;
}
和
String newLine = getText().replaceAll("[^\r\n\f]+", "");
String spaces = getText().replaceAll("[\r\n\f]+", "");
int next = _input.LA(1);
if (opened > 0 || next == '\r' || next == '\n' || next == '\f' || next == '#') {
// If we're inside a list or on a blank line, ignore all indents,
// dedents and line breaks.
skip();
}
else {
emit(commonToken(NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.isEmpty() ? 0 : indents.peek();
if (indent == previous) {
// skip indents of the same size as the present indent-size
skip();
}
else if (indent > previous) {
indents.push(indent);
emit(commonToken(Python3Parser.INDENT, spaces));
}
else {
// Possibly emit more than 1 DEDENT token.
while(!indents.isEmpty() && indents.peek() > indent) {
this.emit(createDedent());
indents.pop();
}
}
}
我把这些翻译成:
# A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
tokens = deque()
# The stack that keeps track of the indentation level.
# https://docs.python.org/3/tutorial/datastructures.html#using-lists-as-stacks
indents = []
# The amount of opened braces, brackets and parenthesis.
opened = 0
# The most recently produced token.
lastToken = None
def emit(self, t):
self._token = t
self.tokens.append(t)
def nextToken(self):
# Check if the end-of-file is ahead and there are still some DEDENTS expected.
if self._input.LA(1) == Token.EOF and self.indents.size() != 0:
# Remove any trailing EOF tokens from our buffer.
for i in range(tokens.size() - 1, 0, -1):
if self.tokens[i].getType() == Token.EOF:
self.tokens.remove(i)
# First emit an extra line break that serves as the end of the statement.
self.emit(commonToken(Python3Parser.NEWLINE, "\n"))
# Now emit as much DEDENT tokens as needed.
while self.indents.size() != 0:
self.emit(createDedent())
self.indents.pop()
# Put the EOF back on the token stream.
self.emit(commonToken(Python3Parser.EOF, "<EOF>"))
next = self.nextToken()
if next.getChannel() == Token.DEFAULT_CHANNEL:
# Keep track of the last token on the default channel.
self.lastToken = next
return next if self.tokens.size() == 0 else self.tokens.popleft()
def createDedent():
dedent = commonToken(Python3Parser.DEDENT, "")
dedent.setLine(self.lastToken.getLine())
return dedent
def commonToken(self, type, text):
stop = self.getCharIndex() - 1
start = stop if text.size() == 0 else stop - text.size() + 1
return CommonToken(self._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop)
def getIndentationCount(spaces):
count = 0
for ch in spaces:
if ch == '\t':
count += 8 - (count % 8)
break
else:
# A normal space char.
count = count + 1
return count
def atStartOfInput(self):
return self.getCharPositionInLine() == 0 and self.getLine() == 1
和
newLine = getText().replaceAll("[^\r\n\f]+", "")
spaces = getText().replaceAll("[\r\n\f]+", "")
next = self._input.LA(1)
if opened > 0 or next == '\r' or next == '\n' or next == '\f' or next == '#':
# If we're inside a list or on a blank line, ignore all indents,
# dedents and line breaks.
skip()
else:
emit(commonToken(NEWLINE, newLine))
indent = getIndentationCount(spaces)
previous = 0 if indents.isEmpty() else indents.peek()
if indent == previous:
# skip indents of the same size as the present indent-size
skip()
elif indent > previous:
indents.push(indent)
emit(commonToken(Python3Parser.INDENT, spaces))
else:
# Possibly emit more than 1 DEDENT token.
while not indents.isEmpty() and indents.peek() > indent:
self.emit(createDedent())
indents.pop()
这是我的python脚本,用于运行antlr输出,其中包含python而不是java片段。使用命令pythonmain.pytest.py
运行
import sys
from antlr4 import *
from Python3Lexer import Python3Lexer
from Python3Parser import Python3Parser
from Python3Listener import Python3Listener
class FuncPrinter(Python3Listener):
def enterFuncdef(self, ctx):
print("Oh, a func")
def main(argv):
input = FileStream(argv[1])
lexer = Python3Lexer(input)
stream = CommonTokenStream(lexer)
parser = Python3Parser(stream)
tree = parser.funcdef()
printer = KeyPrinter()
walker = ParseTreeWalker()
walker.walk(printer, tree)
if __name__ == '__main__':
main(sys.argv)
It错误并打印以下跟踪
Traceback (most recent call last):
File "main.py", line 24, in <module>
main(sys.argv)
File "main.py", line 17, in main
tree = parser.parameters()
File "...\antler-test\Python3Parser.py", line 1297, in parameters
self.enterRule(localctx, 14, self.RULE_parameters)
File "...\antler-test\antlr4\Parser.py", line 358, in enterRule
self._ctx.start = self._input.LT(1)
File "...\antler-test\antlr4\CommonTokenStream.py", line 61, in LT
self.lazyInit()
File "...\antler-test\antlr4\BufferedTokenStream.py", line 186, in lazyInit
self.setup()
File "...\antler-test\antlr4\BufferedTokenStream.py", line 189, in setup
self.sync(0)
File "...\antler-test\antlr4\BufferedTokenStream.py", line 111, in sync
fetched = self.fetch(n)
File "...\antler-test\antlr4\BufferedTokenStream.py", line 123, in fetch
t = self.tokenSource.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
File "...\antler-test\Python3Lexer.py", line 698, in nextToken
next = self.nextToken()
[Previous line repeated 985 more times]
File "...\antler-test\Python3Lexer.py", line 680, in nextToken
if self._input.LA(1) == Token.EOF and self.indents.size() != 0:
File "...\antler-test\antlr4\InputStream.py", line 49, in LA
if offset==0:
RecursionError: maximum recursion depth exceeded in comparison
输入文件如下所示:
def fun1():
return None
def fun2():
return None
我不确定我是否翻译了错误的python或者递归算法对python来说太多了,但是我也不知道如何将nextToken方法的算法更改为迭代,因为它不是尾递归。也许有人能弄清楚?或者我正在做的事情有其他问题吗?
我也有同样的问题。我无法让Alexandre的代码在python3下工作。我不得不稍微修改一下:
...
next = self._input.LA(1)
if next == Python3Parser.EOF:
chr_next = -1
else:
chr_next = chr( next )
if self.opened > 0 or chr_next == '\r' or chr_next == '\n' or chr_next == '\f' or chr_next == '#':
self.skip()
...
您还可以将所有导入移动到lexer的标题中:
@lexer::header {
import re
from Python3Parser import Python3Parser
from antlr4.Token import CommonToken
}
你的python代码说
next = self.nextToken()
但是你的java代码说:
Token next = super.nextToken();
请注意,super与self不同 您的意思可能是:
next = super().nextToken()
几天来,我一直在研究同一个主题。
这并不容易。Python运行时的API与java运行时的API并不完全相同。Python运行时使用较少,而且非常不完整。
我不得不采取一些变通办法,但似乎奏效了。这是我的代码:
tokens { INDENT, DEDENT }
@lexer::members {
# A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
self.tokens = []
# The stack that keeps track of the indentation level.
self.indents = []
# The amount of opened braces, brackets and parenthesis.
self.opened = 0
# The most recently produced token.
self.last_token = None
def emitToken(self, t):
super().emitToken(t)
self.tokens.append(t)
def nextToken(self):
if self._input.LA(1) == Token.EOF and len(self.indents) > 0:
# Remove any trailing EOF tokens from our buffer.
while len(self.tokens) > 0 and self.tokens[-1].type == Token.EOF:
del self.tokens[-1]
# First emit an extra line break that serves as the end of the statement.
self.emitToken(self.common_token(Python3Lexer.NEWLINE, "\n"));
# Now emit as much DEDENT tokens as needed.
while len(self.indents) != 0:
self.emitToken(self.create_dedent())
del self.indents[-1]
# Put the EOF back on the token stream.
self.emitToken(self.common_token(Token.EOF, "<EOF>"));
next = super().nextToken();
if next.channel == Token.DEFAULT_CHANNEL:
# Keep track of the last token on the default channel.
self.last_token = next
if len(self.tokens) == 0:
return next
else:
t = self.tokens[0]
del self.tokens[0]
return t
def create_dedent(self):
from Python3Parser import Python3Parser
dedent = self.common_token(Python3Parser.DEDENT, "")
dedent.line = self.last_token.line
return dedent
def common_token(self, _type, text):
from antlr4.Token import CommonToken
stop = self.getCharIndex() - 1
if len(self.text) == 0:
start = stop
else:
start = stop - len(self.text) + 1
return CommonToken(self._tokenFactorySourcePair, _type, Lexer.DEFAULT_TOKEN_CHANNEL, start, stop)
## Calculates the indentation of the provided spaces, taking the
## following rules into account:
##
## "Tabs are replaced (from left to right) by one to eight spaces
## such that the total number of characters up to and including
## the replacement is a multiple of eight [...]"
##
## -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
def getIndentationCount(self, spaces):
count = 0
for ch in spaces:
if ch == '\t':
count += 8 - (count % 8)
else:
count += 1
return count
def atStartOfInput(self):
return self._interp.column == 0 and self._interp.line == 1
}
对于NEWLINE lexer部分:
NEWLINE
: ( {self.atStartOfInput()}? SPACES
| ( '\r'? '\n' | '\r' | '\f' ) SPACES?
)
{
import re
from Python3Parser import Python3Parser
new_line = re.sub(r"[^\r\n\f]+", "", self._interp.getText(self._input)) #.replaceAll("[^\r\n\f]+", "")
spaces = re.sub(r"[\r\n\f]+", "", self._interp.getText(self._input)) #.replaceAll("[\r\n\f]+", "")
next = self._input.LA(1)
if self.opened > 0 or next == '\r' or next == '\n' or next == '\f' or next == '#':
self.skip()
else:
self.emitToken(self.common_token(self.NEWLINE, new_line))
indent = self.getIndentationCount(spaces)
if len(self.indents) == 0:
previous = 0
else:
previous = self.indents[-1]
if indent == previous:
self.skip()
elif indent > previous:
self.indents.append(indent)
self.emitToken(self.common_token(Python3Parser.INDENT, spaces))
else:
while len(self.indents) > 0 and self.indents[-1] > indent:
self.emitToken(self.create_dedent())
del self.indents[-1]
};
您还必须在整个文件中将lexer id“str”替换为“string”(例如),因为str是python中的关键字。
问题内容: 我有包含以下代码的python代码。 不幸的是,对python中的所有键进行循环并不能达到我的目的,因此我想将此代码转换为C 。对于以元组为键的python词典,最佳的C 数据结构是什么?以上代码的C ++等价物是什么? 我在boost库中查看了稀疏矩阵,但是找不到简单的方法来仅在非零元素上循环。 问题答案: 字典将是c ++中的std :: map,具有两个元素的元组将是std ::
有人能把它翻译成python代码吗,例如,我知道第一行和第二行将是 我只是想不出其他的
我有一个用Django编写的应用程序,我正在尝试在Digital Ocean droplet上的docker中运行它。目前我有两个文件。 有人能建议如何摆脱docker compose吗。yml文件并集成Dockerfile中的所有命令??? 文档文件 docker编写。yml公司 我运行我的应用程序和docker映像,如下所示: docker compose运行web python jk/man
问题内容: 我遇到以下代码: 由于完成了Unicode替换,因此无法编译。 问题是,为什么 注释 ()不覆盖编译器完成的Unicode替换?我认为编译器应该先忽略注释,然后再执行代码翻译。 编辑: 不知道上面是否足够清楚。 我知道上面发生了什么,为什么会出错。我的期望是,在对代码进行任何翻译之前,编译器应忽略所有注释行。显然,这里不是这样。我期待这种行为的理由。 问题答案: 该规范指出,Java编
我使用thymeleaf生成一个包含一些javascript的页面。js是用thymeleaf生成的,目的是将java模型的某些部分放到我的页面中。 我知道如何使用thymeleaf将一些翻译成html。 但是现在我想把一些thymeleaf翻译成我生成的js,以便集中我的翻译工作。 理想情况下,我希望有一个变量,该变量将包含我翻译的所有消息。这个变量将在genrated js脚本中初始化。 有关
我最近开始使用Java和MongoDB,发现事情不像C#那么简单。 在C#中,我可以创建一个类(作为一个模型),用下面的行将它保存为MongoDB中的Bson对象。 在Java中,我创建了我的类,我得到的类是这样的: