From: Thomas Walker Lynch Date: Thu, 5 Sep 2024 07:37:08 +0000 (+0000) Subject: adds build support for split lexer parser ANTLR grammar. Adds program RuleNameList... X-Git-Url: https://git.reasoningtechnology.com/usr/lib/python2.7/sre_compile.py?a=commitdiff_plain;h=d081f46424d143e272d6ca66fd1374613a25a62b;p=GQL-to-Cypher adds build support for split lexer parser ANTLR grammar. Adds program RuleNameList that reports from the parse. --- diff --git a/README.md b/README.md index 59bdd0b..6d0d30a 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,26 @@ # Project management -Please read documents in the ./lector directory for information -about managing this project and using the tools, if any. +As you are reading this you might be a GQL_to_Cypter-ologist. Please read +documents in the ./ologist directory for project information, and the +documents in the developer/ologist directory for documents on building +the project. -For developers, get started by typing: +The project top level is for project management and imported tools. Developers +do development work in the `developer` directory. + +For developers, from the top of the project get started by typing: ``` -> . developer_init +> . executor/env_dev `` -That is similar to the `activate` of Python. +This will setup the environment and `cd` to the developer directory. (`env_dev` +is analogous to the Python virtual environment `activate`.) # About -This is a transpiler from GQL to Cypher. - -The `syntax_recognizer` accepts a GQL query and then outputs a -the syntax found as an XML file. - -# State of development +This is a project to develope a transpiler from GQL to Cypher. -Working on the `syntax_recognizer`. - diff --git a/developer/ANTLR/LexerAdaptor.java b/developer/ANTLR/LexerAdaptor.java new file mode 100644 index 0000000..338000e --- /dev/null +++ b/developer/ANTLR/LexerAdaptor.java @@ -0,0 +1,151 @@ +/* + [The "BSD licence"] + Copyright (c) 2005-2007 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +//package org.antlr.parser.antlr4; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.Token; +import org.antlr.v4.runtime.misc.Interval; + +public abstract class LexerAdaptor extends Lexer { + + /** + * Generic type for OPTIONS, TOKENS and CHANNELS + */ + private static final int PREQUEL_CONSTRUCT = -10; + private static final int OPTIONS_CONSTRUCT = -11; + + public LexerAdaptor(CharStream input) { + super(input); + } + + /** + * Track whether we are inside of a rule and whether it is lexical parser. _currentRuleType==Token.INVALID_TYPE + * means that we are outside of a rule. At the first sign of a rule name reference and _currentRuleType==invalid, we + * can assume that we are starting a parser rule. Similarly, seeing a token reference when not already in rule means + * starting a token rule. The terminating ';' of a rule, flips this back to invalid type. + * + * This is not perfect logic but works. For example, "grammar T;" means that we start and stop a lexical rule for + * the "T;". Dangerous but works. + * + * The whole point of this state information is to distinguish between [..arg actions..] and [charsets]. Char sets + * can only occur in lexical rules and arg actions cannot occur. + */ + private int _currentRuleType = Token.INVALID_TYPE; + + private boolean insideOptionsBlock = false; + + public int getCurrentRuleType() { + return _currentRuleType; + } + + public void setCurrentRuleType(int ruleType) { + this._currentRuleType = ruleType; + } + + protected void handleBeginArgument() { + if (inLexerRule()) { + pushMode(ANTLRv4Lexer.LexerCharSet); + more(); + } else { + pushMode(ANTLRv4Lexer.Argument); + } + } + + protected void handleEndArgument() { + popMode(); + if (_modeStack.size() > 0) { + setType(ANTLRv4Lexer.ARGUMENT_CONTENT); + } + } + + protected void handleEndAction() { + int oldMode = _mode; + int newMode = popMode(); + boolean isActionWithinAction = _modeStack.size() > 0 + && newMode == ANTLRv4Lexer.TargetLanguageAction + && oldMode == newMode; + + if (isActionWithinAction) { + setType(ANTLRv4Lexer.ACTION_CONTENT); + } + } + + @Override + public Token emit() { + if ((_type == ANTLRv4Lexer.OPTIONS || _type == ANTLRv4Lexer.TOKENS || _type == ANTLRv4Lexer.CHANNELS) + && getCurrentRuleType() == Token.INVALID_TYPE) { // enter prequel construct ending with an RBRACE + setCurrentRuleType(PREQUEL_CONSTRUCT); + } else if (_type == ANTLRv4Lexer.OPTIONS && getCurrentRuleType() == ANTLRv4Lexer.TOKEN_REF) + { + setCurrentRuleType(OPTIONS_CONSTRUCT); + } else if (_type == ANTLRv4Lexer.RBRACE && getCurrentRuleType() == PREQUEL_CONSTRUCT) { // exit prequel construct + setCurrentRuleType(Token.INVALID_TYPE); + } else if (_type == ANTLRv4Lexer.RBRACE && getCurrentRuleType() == OPTIONS_CONSTRUCT) + { // exit options + setCurrentRuleType(ANTLRv4Lexer.TOKEN_REF); + } else if (_type == ANTLRv4Lexer.AT && getCurrentRuleType() == Token.INVALID_TYPE) { // enter action + setCurrentRuleType(ANTLRv4Lexer.AT); + } else if (_type == ANTLRv4Lexer.SEMI && getCurrentRuleType() == OPTIONS_CONSTRUCT) + { // ';' in options { .... }. Don't change anything. + } else if (_type == ANTLRv4Lexer.END_ACTION && getCurrentRuleType() == ANTLRv4Lexer.AT) { // exit action + setCurrentRuleType(Token.INVALID_TYPE); + } else if (_type == ANTLRv4Lexer.ID) { + String firstChar = _input.getText(Interval.of(_tokenStartCharIndex, _tokenStartCharIndex)); + if (Character.isUpperCase(firstChar.charAt(0))) { + _type = ANTLRv4Lexer.TOKEN_REF; + } else { + _type = ANTLRv4Lexer.RULE_REF; + } + + if (getCurrentRuleType() == Token.INVALID_TYPE) { // if outside of rule def + setCurrentRuleType(_type); // set to inside lexer or parser rule + } + } else if (_type == ANTLRv4Lexer.SEMI) { // exit rule def + setCurrentRuleType(Token.INVALID_TYPE); + } + + return super.emit(); + } + + private boolean inLexerRule() { + return getCurrentRuleType() == ANTLRv4Lexer.TOKEN_REF; + } + + @SuppressWarnings("unused") + private boolean inParserRule() { // not used, but added for clarity + return getCurrentRuleType() == ANTLRv4Lexer.RULE_REF; + } + + @Override + public void reset() { + setCurrentRuleType(Token.INVALID_TYPE); + insideOptionsBlock = false; + super.reset(); + } +} diff --git a/developer/ANTLR/grammar_rules.g4 b/developer/ANTLR/grammar_rules.g4 deleted file mode 100644 index d02b7ab..0000000 --- a/developer/ANTLR/grammar_rules.g4 +++ /dev/null @@ -1,3330 +0,0 @@ -grammar GQL_20240412; - -options { caseInsensitive = true; } - -// 6 - -gqlProgram -: programActivity sessionCloseCommand? EOF -| sessionCloseCommand EOF -; - -programActivity -: sessionActivity -| transactionActivity -; - -sessionActivity -: sessionResetCommand+ -| sessionSetCommand+ sessionResetCommand* -; - -transactionActivity -: startTransactionCommand (procedureSpecification endTransactionCommand?)? -| procedureSpecification endTransactionCommand? -| endTransactionCommand -; - -endTransactionCommand -: rollbackCommand -| commitCommand -; - -// 7.1 - -sessionSetCommand -: SESSION SET (sessionSetSchemaClause | sessionSetGraphClause | sessionSetTimeZoneClause | sessionSetParameterClause) -; - -sessionSetSchemaClause -: SCHEMA schemaReference -; - -sessionSetGraphClause -: PROPERTY? GRAPH graphExpression -; - -sessionSetTimeZoneClause -: TIME ZONE setTimeZoneValue -; - -setTimeZoneValue -: timeZoneString -; - -sessionSetParameterClause -: sessionSetGraphParameterClause -| sessionSetBindingTableParameterClause -| sessionSetValueParameterClause -; - -sessionSetGraphParameterClause -: PROPERTY? GRAPH sessionSetParameterName optTypedGraphInitializer -; - -sessionSetBindingTableParameterClause -: BINDING? TABLE sessionSetParameterName optTypedBindingTableInitializer -; - -sessionSetValueParameterClause -: VALUE sessionSetParameterName optTypedValueInitializer -; - -sessionSetParameterName -: (IF NOT EXISTS)? sessionParameterSpecification -; - -// 7.2 - -sessionResetCommand -: SESSION RESET sessionResetArguments? -; - -sessionResetArguments -: ALL? (PARAMETERS | CHARACTERISTICS) -| SCHEMA -| PROPERTY? GRAPH -| TIME ZONE -| PARAMETER? sessionParameterSpecification -; - -// 7.3 - -sessionCloseCommand -: SESSION CLOSE -; - -// 7.4 - -sessionParameterSpecification -: GENERAL_PARAMETER_REFERENCE -; - -// 8.1 - -startTransactionCommand -: START TRANSACTION transactionCharacteristics? -; - -// 8.2 - -transactionCharacteristics -: transactionMode (COMMA transactionMode)* -; - -transactionMode -: transactionAccessMode -; - -transactionAccessMode -: READ ONLY -| READ WRITE -; - -// 8.3 - -rollbackCommand -: ROLLBACK -; - -// 8.4 - -commitCommand -: COMMIT -; - -// 9.1 - -nestedProcedureSpecification -: LEFT_BRACE procedureSpecification RIGHT_BRACE -; - -// , and are -// identical productions. The specification distinguishes them in the BNF, but in the implementation, the distinction -// has to be made sematically, in code, based on the kind of statements contained in the . -procedureSpecification -: procedureBody -// : catalogModifyingProcedureSpecification -// | dataModifyingProcedureSpecification -// | querySpecification -; - -//catalogModifyingProcedureSpecification -// : procedureBody -// ; - -nestedDataModifyingProcedureSpecification -: LEFT_BRACE procedureBody RIGHT_BRACE -; - -//dataModifyingProcedureSpecification -// : procedureBody -// ; - -nestedQuerySpecification -: LEFT_BRACE procedureBody RIGHT_BRACE -; - -//querySpecification -// : procedureBody -// ; - -// 9.2 - -procedureBody -: atSchemaClause? bindingVariableDefinitionBlock? statementBlock -; - -bindingVariableDefinitionBlock -: bindingVariableDefinition+ -; - -bindingVariableDefinition -: graphVariableDefinition -| bindingTableVariableDefinition -| valueVariableDefinition -; - -statementBlock -: statement nextStatement* -; - -statement -: linearCatalogModifyingStatement -| linearDataModifyingStatement -| compositeQueryStatement -; - -nextStatement -: NEXT yieldClause? statement -; - -// 10.1 - -graphVariableDefinition -: PROPERTY? GRAPH bindingVariable optTypedGraphInitializer -; - -optTypedGraphInitializer -: (typed? graphReferenceValueType)? graphInitializer -; - -graphInitializer -: EQUALS_OPERATOR graphExpression -; - -// 10.2 - -bindingTableVariableDefinition -: BINDING? TABLE bindingVariable optTypedBindingTableInitializer -; - -optTypedBindingTableInitializer -: (typed? bindingTableReferenceValueType)? bindingTableInitializer -; - -bindingTableInitializer -: EQUALS_OPERATOR bindingTableExpression -; - -// 10.3 - -valueVariableDefinition -: VALUE bindingVariable optTypedValueInitializer -; - -optTypedValueInitializer -: (typed? valueType)? valueInitializer -; - -valueInitializer -: EQUALS_OPERATOR valueExpression -; - -// 11.1 - -graphExpression -: objectExpressionPrimary -| graphReference -| objectNameOrBindingVariable -| currentGraph -; - -currentGraph -: CURRENT_PROPERTY_GRAPH -| CURRENT_GRAPH -; - -// 11.2 - -bindingTableExpression -: nestedBindingTableQuerySpecification -| objectExpressionPrimary -| bindingTableReference -| objectNameOrBindingVariable -; - -nestedBindingTableQuerySpecification -: nestedQuerySpecification -; - -// 11.3 - -objectExpressionPrimary -: VARIABLE valueExpressionPrimary -| parenthesizedValueExpression -| nonParenthesizedValueExpressionPrimarySpecialCase -; - -// 12.1 - -linearCatalogModifyingStatement -: simpleCatalogModifyingStatement+ -; - -simpleCatalogModifyingStatement -: primitiveCatalogModifyingStatement -| callCatalogModifyingProcedureStatement -; - -primitiveCatalogModifyingStatement -: createSchemaStatement -| dropSchemaStatement -| createGraphStatement -| dropGraphStatement -| createGraphTypeStatement -| dropGraphTypeStatement -; - -// 12.2 - -createSchemaStatement -: CREATE SCHEMA (IF NOT EXISTS)? catalogSchemaParentAndName -; - -// 12.3 - -dropSchemaStatement -: DROP SCHEMA (IF EXISTS)? catalogSchemaParentAndName -; - -// 12.4 - -createGraphStatement -: CREATE (PROPERTY? GRAPH (IF NOT EXISTS)? | OR REPLACE PROPERTY? GRAPH) catalogGraphParentAndName (openGraphType | ofGraphType) graphSource? -; - -openGraphType -: typed? ANY (PROPERTY? GRAPH)? -; - -ofGraphType -: graphTypeLikeGraph -| typed? graphTypeReference -| typed? (PROPERTY? GRAPH)? nestedGraphTypeSpecification -; - -graphTypeLikeGraph -: LIKE graphExpression -; - -graphSource -: AS COPY OF graphExpression -; - -// 12.5 - -dropGraphStatement -: DROP PROPERTY? GRAPH (IF EXISTS)? catalogGraphParentAndName -; - -// 12.6 - -createGraphTypeStatement -: CREATE (PROPERTY? GRAPH TYPE (IF NOT EXISTS)? | OR REPLACE PROPERTY? GRAPH TYPE) catalogGraphTypeParentAndName graphTypeSource -; - -graphTypeSource -: AS? copyOfGraphType -| graphTypeLikeGraph -| AS? nestedGraphTypeSpecification -; - -copyOfGraphType -: COPY OF graphTypeReference -; - -// 12.7 - -dropGraphTypeStatement -: DROP PROPERTY? GRAPH TYPE (IF EXISTS)? catalogGraphTypeParentAndName -; - -// 12.8 - -callCatalogModifyingProcedureStatement -: callProcedureStatement -; - -// 13.1 - -linearDataModifyingStatement -: focusedLinearDataModifyingStatement -| ambientLinearDataModifyingStatement -; - -focusedLinearDataModifyingStatement -: focusedLinearDataModifyingStatementBody -| focusedNestedDataModifyingProcedureSpecification -; - -focusedLinearDataModifyingStatementBody -: useGraphClause simpleLinearDataAccessingStatement primitiveResultStatement? -; - -focusedNestedDataModifyingProcedureSpecification -: useGraphClause nestedDataModifyingProcedureSpecification -; - -ambientLinearDataModifyingStatement -: ambientLinearDataModifyingStatementBody -| nestedDataModifyingProcedureSpecification -; - -ambientLinearDataModifyingStatementBody -: simpleLinearDataAccessingStatement primitiveResultStatement? -; - -simpleLinearDataAccessingStatement -: simpleQueryStatement* simpleDataModifyingStatement+ -; - -// Subsumed by previous rule to enforce 13.1 SR 5 -//simpleDataAccessingStatement -// : simpleQueryStatement -// | simpleDataModifyingStatement -// ; - -simpleDataModifyingStatement -: primitiveDataModifyingStatement -| callDataModifyingProcedureStatement -; - -primitiveDataModifyingStatement -: insertStatement -| setStatement -| removeStatement -| deleteStatement -; - -// 13.2 - -insertStatement -: INSERT insertGraphPattern -; - -// 13.3 - -setStatement -: SET setItemList -; - -setItemList -: setItem (COMMA setItem)* -; - -setItem -: setPropertyItem -| setAllPropertiesItem -| setLabelItem -; - -setPropertyItem -: bindingVariableReference PERIOD propertyName EQUALS_OPERATOR valueExpression -; - -setAllPropertiesItem -: bindingVariableReference EQUALS_OPERATOR LEFT_BRACE propertyKeyValuePairList? RIGHT_BRACE -; - -setLabelItem -: bindingVariableReference isOrColon labelName -; - -// 13.4 - -removeStatement -: REMOVE removeItemList -; - -removeItemList -: removeItem (COMMA removeItem)* -; - -removeItem -: removePropertyItem -| removeLabelItem -; - -removePropertyItem -: bindingVariableReference PERIOD propertyName -; - -removeLabelItem -: bindingVariableReference isOrColon labelName -; - -// 13.5 - -deleteStatement -: (DETACH | NODETACH)? DELETE deleteItemList -; - -deleteItemList -: deleteItem (COMMA deleteItem)* -; - -deleteItem -: valueExpression -; - -// 13.6 - -callDataModifyingProcedureStatement -: callProcedureStatement -; - -// 14.1 - -compositeQueryStatement -: compositeQueryExpression -; - -// 14.2 - -compositeQueryExpression -: compositeQueryExpression queryConjunction compositeQueryPrimary -| compositeQueryPrimary -; - -queryConjunction -: setOperator -| OTHERWISE -; - -setOperator -: UNION setQuantifier? -| EXCEPT setQuantifier? -| INTERSECT setQuantifier? -; - -compositeQueryPrimary -: linearQueryStatement -; - -// 14.3 and - -linearQueryStatement -: focusedLinearQueryStatement -| ambientLinearQueryStatement -; - -focusedLinearQueryStatement -: focusedLinearQueryStatementPart* focusedLinearQueryAndPrimitiveResultStatementPart -| focusedPrimitiveResultStatement -| focusedNestedQuerySpecification -| selectStatement -; - -focusedLinearQueryStatementPart -: useGraphClause simpleLinearQueryStatement -; - -focusedLinearQueryAndPrimitiveResultStatementPart -: useGraphClause simpleLinearQueryStatement primitiveResultStatement -; - -focusedPrimitiveResultStatement -: useGraphClause primitiveResultStatement -; - -focusedNestedQuerySpecification -: useGraphClause nestedQuerySpecification -; - -ambientLinearQueryStatement -: simpleLinearQueryStatement? primitiveResultStatement -| nestedQuerySpecification -; - -simpleLinearQueryStatement -: simpleQueryStatement+ -; - -simpleQueryStatement -: primitiveQueryStatement -| callQueryStatement -; - -primitiveQueryStatement -: matchStatement -| letStatement -| forStatement -| filterStatement -| orderByAndPageStatement -; - -// 14.4 - -matchStatement -: simpleMatchStatement -| optionalMatchStatement -; - -simpleMatchStatement -: MATCH graphPatternBindingTable -; - -optionalMatchStatement -: OPTIONAL optionalOperand -; - -optionalOperand -: simpleMatchStatement -| LEFT_BRACE matchStatementBlock RIGHT_BRACE -| LEFT_PAREN matchStatementBlock RIGHT_PAREN -; - -matchStatementBlock -: matchStatement+ -; - -// 14.5 - -callQueryStatement -: callProcedureStatement -; - -// 14.6 - -filterStatement -: FILTER (whereClause | searchCondition) -; - -// 14.7 - -letStatement -: LET letVariableDefinitionList -; - -letVariableDefinitionList -: letVariableDefinition (COMMA letVariableDefinition)* -; - -letVariableDefinition -: valueVariableDefinition -| bindingVariable EQUALS_OPERATOR valueExpression -; - -// 14.8 - -forStatement -: FOR forItem forOrdinalityOrOffset? -; - -forItem -: forItemAlias forItemSource -; - -forItemAlias -: bindingVariable IN -; - -forItemSource -: valueExpression -; - -forOrdinalityOrOffset -: WITH (ORDINALITY | OFFSET) bindingVariable -; - -// 14.9 - -orderByAndPageStatement -: orderByClause offsetClause? limitClause? -| offsetClause limitClause? -| limitClause -; - -// 14.10 - -primitiveResultStatement -: returnStatement orderByAndPageStatement? -| FINISH -; - -// 14.11 - -returnStatement -: RETURN returnStatementBody -; - -returnStatementBody -: setQuantifier? (ASTERISK | returnItemList) groupByClause? -| NO BINDINGS -; - -returnItemList -: returnItem (COMMA returnItem)* -; - -returnItem -: aggregatingValueExpression returnItemAlias? -; - -returnItemAlias -: AS identifier -; - -// 14.12