From: Thomas Walker Lynch Date: Thu, 1 Aug 2024 08:22:35 +0000 (+0000) Subject: lector -> documentation to alleviate confusion among first timers X-Git-Url: https://git.reasoningtechnology.com/usr/lib/python2.7/sre_parse.py?a=commitdiff_plain;h=9676433fb72eb99424f3ce7e62394a0f5b2f1a40;p=GQL-to-Cypher lector -> documentation to alleviate confusion among first timers --- diff --git a/developer/documentation/.githolder b/developer/documentation/.githolder new file mode 100644 index 0000000..e69de29 diff --git a/developer/documentation/Cypher_GQL_differences.txt b/developer/documentation/Cypher_GQL_differences.txt new file mode 100644 index 0000000..227a5bf --- /dev/null +++ b/developer/documentation/Cypher_GQL_differences.txt @@ -0,0 +1,62 @@ +Seems to me, that creating a disjoint sub graph is identical to creating a new graph. Perhaps we can work with that. Why don't we create a master index of disjoint subgraphs in a header, then when a new graph is made we will add an entry in the index. +ChatGPT +Creating a master index of disjoint subgraphs and managing them within a single Neo4j database is a practical approach. By using labels or properties to differentiate subgraphs, you can effectively manage multiple logical graphs within a single physical graph. + + Here's a step-by-step approach to implement this: + + Step 1: Create the Master Index + First, create a master index node that will store references to all disjoint subgraphs. + + cypher + + MERGE (index:GraphIndex {name: 'MasterIndex'}) + Step 2: Create a New Graph + When creating a new graph, add an entry to the master index. + + cypher + + // Create a new graph and add an entry to the master index + MERGE (index:GraphIndex {name: 'MasterIndex'}) + CREATE (graph:Graph {name: 'myNewGraph'}) + MERGE (index)-[:CONTAINS]->(graph) + + // Create nodes and relationships within the new graph + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Alice'}) + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Bob'}) + Step 3: Querying Subgraphs + To query a specific subgraph, use the label or properties associated with that subgraph. + + cypher + + MATCH (index:GraphIndex {name: 'MasterIndex'})-[:CONTAINS]->(graph:Graph {name: 'myNewGraph'})-[:HAS_NODE]->(person:Person:myNewGraph) + RETURN person.name + Step 4: Adding More Nodes and Relationships to a Subgraph + To add more nodes and relationships to an existing subgraph, refer to the subgraph by its label or properties. + + cypher + + MATCH (graph:Graph {name: 'myNewGraph'}) + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Charlie'}) + CREATE (person:Person:myNewGraph {name: 'Alice'})-[:KNOWS]->(person:Person:myNewGraph {name: 'Charlie'}) + Example: Full Workflow + Creating a New Graph + cypher + + MERGE (index:GraphIndex {name: 'MasterIndex'}) + CREATE (graph:Graph {name: 'myNewGraph'}) + MERGE (index)-[:CONTAINS]->(graph) + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Alice'}) + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Bob'}) + Querying a Subgraph + cypher + + MATCH (index:GraphIndex {name: 'MasterIndex'})-[:CONTAINS]->(graph:Graph {name: 'myNewGraph'})-[:HAS_NODE]->(person:Person:myNewGraph) + RETURN person.name + Adding to a Subgraph + cypher + + MATCH (graph:Graph {name: 'myNewGraph'}) + CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Charlie'}) + MATCH (alice:Person:myNewGraph {name: 'Alice'}) + MATCH (charlie:Person:myNewGraph {name: 'Charlie'}) + CREATE (alice)-[:KNOWS]->(charlie) diff --git a/developer/javac/PrintVisitor.java b/developer/javac/PrintVisitor.java new file mode 100644 index 0000000..8146f4a --- /dev/null +++ b/developer/javac/PrintVisitor.java @@ -0,0 +1,31 @@ +import org.antlr.v4.runtime.tree.*; +import org.antlr.v4.runtime.RuleContext; + +public class PrintVisitor extends AbstractParseTreeVisitor implements GQL_20240412Visitor { + private final String[] ruleNames; + + public PrintVisitor(String[] ruleNames) { + this.ruleNames = ruleNames; + } + + @Override + public String visit(ParseTree tree) { + if (tree instanceof TerminalNode) { + return tree.getText(); + } + + StringBuilder sb = new StringBuilder(); + String ruleName = ruleNames[((RuleContext) tree).getRuleIndex()]; + sb.append(ruleName).append("("); + + for (int i = 0; i < tree.getChildCount(); i++) { + sb.append(visit(tree.getChild(i))); + if (i < tree.getChildCount() - 1) { + sb.append(" "); + } + } + + sb.append(")"); + return sb.toString(); + } +} diff --git a/developer/javac/SyntaxTree20240412.java b/developer/javac/SyntaxTree20240412.java new file mode 100644 index 0000000..a8db135 --- /dev/null +++ b/developer/javac/SyntaxTree20240412.java @@ -0,0 +1,36 @@ +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.*; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class SyntaxTree20240412 { + + // Versioned Lexer and Parser + static final Class GQL_Lexer = GQL_20240412Lexer.class; + static final Class GQL_Parser = GQL_20240412Parser.class; + + public static void main(String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Usage: java SyntaxTree20240412 "); + System.exit(1); + } + + String inputFile = args[0]; + String input = new String(Files.readAllBytes(Paths.get(inputFile))); + + try { + Lexer lexer = (Lexer) GQL_Lexer.getConstructor(CharStream.class).newInstance(CharStreams.fromString(input)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + Parser parser = (Parser) GQL_Parser.getConstructor(TokenStream.class).newInstance(tokens); + Method startRule = parser.getClass().getMethod("program"); // Assuming 'program' is the start rule + ParseTree tree = (ParseTree) startRule.invoke(parser); + + PrintVisitor visitor = new PrintVisitor(parser.getRuleNames()); + String syntaxTree = visitor.visit(tree); + System.out.println(syntaxTree); + } catch (Exception e) { + e.printStackTrace(); + } + } +} diff --git a/developer/lector/.githolder b/developer/lector/.githolder deleted file mode 100644 index e69de29..0000000 diff --git a/developer/lector/Cypher_GQL_differences.txt b/developer/lector/Cypher_GQL_differences.txt deleted file mode 100644 index 227a5bf..0000000 --- a/developer/lector/Cypher_GQL_differences.txt +++ /dev/null @@ -1,62 +0,0 @@ -Seems to me, that creating a disjoint sub graph is identical to creating a new graph. Perhaps we can work with that. Why don't we create a master index of disjoint subgraphs in a header, then when a new graph is made we will add an entry in the index. -ChatGPT -Creating a master index of disjoint subgraphs and managing them within a single Neo4j database is a practical approach. By using labels or properties to differentiate subgraphs, you can effectively manage multiple logical graphs within a single physical graph. - - Here's a step-by-step approach to implement this: - - Step 1: Create the Master Index - First, create a master index node that will store references to all disjoint subgraphs. - - cypher - - MERGE (index:GraphIndex {name: 'MasterIndex'}) - Step 2: Create a New Graph - When creating a new graph, add an entry to the master index. - - cypher - - // Create a new graph and add an entry to the master index - MERGE (index:GraphIndex {name: 'MasterIndex'}) - CREATE (graph:Graph {name: 'myNewGraph'}) - MERGE (index)-[:CONTAINS]->(graph) - - // Create nodes and relationships within the new graph - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Alice'}) - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Bob'}) - Step 3: Querying Subgraphs - To query a specific subgraph, use the label or properties associated with that subgraph. - - cypher - - MATCH (index:GraphIndex {name: 'MasterIndex'})-[:CONTAINS]->(graph:Graph {name: 'myNewGraph'})-[:HAS_NODE]->(person:Person:myNewGraph) - RETURN person.name - Step 4: Adding More Nodes and Relationships to a Subgraph - To add more nodes and relationships to an existing subgraph, refer to the subgraph by its label or properties. - - cypher - - MATCH (graph:Graph {name: 'myNewGraph'}) - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Charlie'}) - CREATE (person:Person:myNewGraph {name: 'Alice'})-[:KNOWS]->(person:Person:myNewGraph {name: 'Charlie'}) - Example: Full Workflow - Creating a New Graph - cypher - - MERGE (index:GraphIndex {name: 'MasterIndex'}) - CREATE (graph:Graph {name: 'myNewGraph'}) - MERGE (index)-[:CONTAINS]->(graph) - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Alice'}) - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Bob'}) - Querying a Subgraph - cypher - - MATCH (index:GraphIndex {name: 'MasterIndex'})-[:CONTAINS]->(graph:Graph {name: 'myNewGraph'})-[:HAS_NODE]->(person:Person:myNewGraph) - RETURN person.name - Adding to a Subgraph - cypher - - MATCH (graph:Graph {name: 'myNewGraph'}) - CREATE (graph)-[:HAS_NODE]->(:Person:myNewGraph {name: 'Charlie'}) - MATCH (alice:Person:myNewGraph {name: 'Alice'}) - MATCH (charlie:Person:myNewGraph {name: 'Charlie'}) - CREATE (alice)-[:KNOWS]->(charlie) diff --git a/developer/makefile b/developer/makefile index 090dcbe..ebb203e 100644 --- a/developer/makefile +++ b/developer/makefile @@ -9,6 +9,7 @@ ISLAND = land island # JAVA_HOME, CLASSPATH, and ANTLR_JAR variables come from `env_dev` + JAVA_COMP = $(JAVA_HOME)/bin/javac JAVA_INTERP = $(JAVA_HOME)/bin/java JAR = $(JAVA_HOME)/bin/jar @@ -20,6 +21,8 @@ ANTLR_OUT_DIR_DIR = javac ANTLR_OUT_DIR = javac/ANTLR ANTLR_IN_FL = $(wildcard $(ANTLR_IN_DIR)/*.g4) ANTLR_OUT_FL = $(patsubst $(ANTLR_IN_DIR)/%.g4,$(ANTLR_OUT_DIR)/%.java,$(ANTLR_IN_FL)) +CLASSPATH_LOCAL := $(CLASSPATH):$(ANTLR_OUT_DIR) + JAVA_IN_DIR = javac JVM_IN_DIR = jvm @@ -34,9 +37,9 @@ JAVA_OUT_FL = $(JAVA_OUT_FL0) $(JAVA_OUT_FL1) # programs we are creating -GQL_SYNTAX_TREE = GQL_syntax_tree +SYNTAX_TREE_20240412 = SyntaxTree20240412 #... -OUT_FL = $(JVM_IN_DIR)/$(GQL_SYNTAX_TREE).jar +OUT_FL = $(JVM_IN_DIR)/$(SYNTAX_TREE_20240412).jar .PHONY: all all: grammar program @@ -63,6 +66,7 @@ variable: $(info ANTLR_OUT_DIR_DIR is '$(ANTLR_OUT_DIR_DIR)') $(info ANTLR_IN_FL is '$(ANTLR_IN_FL)') $(info ANTLR_OUT_FL is '$(ANTLR_OUT_FL)') + $(info CLASSPATH_LOCAL is '$(CLASSPATH_LOCAL)') $(info JAVA_IN_DIR is '$(JAVA_IN_DIR)') $(info JVM_IN_DIR is '$(JVM_IN_DIR)') @@ -121,12 +125,9 @@ clean: $(ANTLR_OUT_DIR)/%.java: $(ANTLR_IN_DIR)/%.g4 $(JAVA_INTERP) -jar $(ANTLR_JAR) -Dlanguage=Java -o $(ANTLR_OUT_DIR_DIR) $< -$(JAVA_OUT_FL0): $(JVM_IN_DIR)/%.class: $(JAVA_IN_DIR)/%.java - $(JAVA_COMP) -d $(JVM_IN_DIR) -cp $(ANTLR_JAR) $< - -$(JAVA_OUT_FL1): $(JVM_IN_DIR)/%.class: $(ANTLR_OUT_DIR)/%.java - $(JAVA_COMP) -d $(JVM_IN_DIR) -cp $(ANTLR_JAR) $< +$(JVM_IN_DIR)/%.class: $(JAVA_IN_DIR)/%.java $(ANTLR_OUT_DIR)/%.java + $(JAVA_COMP) -d $(JVM_IN_DIR) -cp $(CLASSPATH_LOCAL) $< -$(JVM_IN_DIR)/$(GQL_SYNTAX_TREE).jar: $(JAVA_OUT_FL) +$(JVM_IN_DIR)/$(SYNTAX_TREE_20240412).jar: $(JAVA_OUT_FL) $(JAR) cvf $@ -C $(JVM_IN_DIR) . diff --git a/documentation/README.md b/documentation/README.md new file mode 100644 index 0000000..21b4176 --- /dev/null +++ b/documentation/README.md @@ -0,0 +1,7 @@ + +This directory contains project management documentation. Find here information +about the directory structure, tools used, and installation required for setting +up the environment. + +The `lector` directory under `developer` has documentation for the code +being developed. diff --git a/documentation/directory_structure.md b/documentation/directory_structure.md new file mode 100644 index 0000000..8755bcb --- /dev/null +++ b/documentation/directory_structure.md @@ -0,0 +1,105 @@ +**directory_structure.txt** + +# Property-Based Directory Naming + +We think of a directory name as being the value of a property, where this +property is shared by each and every file in said directory. + +When developing programs, we use a number of tools that process files. In this +context, these are common property values to think of files having in common: + +* `processed_by` +* `category_processed_by` + +The `processed_by` property is also known as `input`, which is often shortened +to `in`. For example, C files to be compiled are processed by `cc`, so `cc` is a +fine name for the directory. Other names could be `processed_by_cc`, `cc_input`, +or `cc_in`. + +When input files destined to be processed by more than one related program are +mixed in a directory, we generalize to the category. For example, a directory +holding a collection of both Rust and C files could be called `compiler`, where +`compiler` is the property that the related processing programs have. A mix of +interpreted scripts destined to be processed by different interpreters could be +`interpreter`. + +Files directly loaded and run by the machine are `processed_by` the architecture +(e.g., `x86`) or `category_processed_by` a `machine`. + +An interpreter creates machine code on-the-fly. Calling an interpreter is the +final step in a development chain, similar to calling the loader for a machine +file. So, let us call the loader or the interpreter an `executor`. Thus, files +in such a mixed directory are to be processed by an executor. + +The processing of files in a collection can be done by a person. Such is the +case of the directory used by developers when they work, hence such a directory +is called `developer`. + +We run into a bit of a problem when there is no word in English for the role a +person plays when processing files in a directory. Such is the case for a +directory full of documentation. The term `reader` comes close, but when you +think about it, everything is a reader. For example, a compiler reads its input +files. I thought to appeal to Latin and use `lector`, which indeed would refer +to a person. However, the term is not intuitive, and people who cannot find this +documentation will not be reading it so as to understand this term. + +In such a situation, we can move on to other property values shared by the files +in the directory: + +* `purpose_is` +* `is_a` +* `is X` +* `owned_by` + +Each file in a documentation directory shares the purpose of being +documentation, hence such directories are called `documentation`. + + +# Implicit Property Name + +The property used when naming a directory is implicit, so coming up with a +directory name is essentially a mental exercise. The reader of the directory +name is left to figure out the shared property value the person had in mind when +naming the directory. + +Adopting a convention makes the task of deriving the property name easier, but +if this is still not sufficient, then integrate the property name into the the +directory name along with the common value. `is_a_red_car`, might be full of +files that are describing read cars. This examples enters the gray area between +file systems and databases. + +Sometimes a directory name will be held in a variable in a script, and we need +to further distinguish the fact that the variable is referring to a +directory. For example, a variable that holds the name of the directory that +holds files to be processed by `javac` can be called `javac_input_directory`, or +for those who want to be swank, `javac_in_dir`. + +# Why Directory Names Are Often Singular + +Each and every file in a directory has the same property value for a given +property. Hence, this property value comes from the property of a file, +a single object. + +For example, if a directory is named `compiler`, it signifies that each file in +the directory is `processed_by` a `compiler`. + +However, there can be cases where the property value, though it comes from an +individual file, will be plural. For example, a directory could conceivably be +named `insights`, where each file in the directory contains multiple +insights. When we look at the property value for a single file, it is plural. + +# Hidden Files + +In my humble opinion, that git uses hidden files is unfortunate. In general, I +find hidden files in shared projects to be a bad idea. The approach of using a +symbolic link to give hidden files visible names creates a maintenance +problem. The idea of setting explicit parameters for git can be used to force +git to not create hidden files, but this approach does not expose other hidden +files that might be placed into a project repository. There is also an issue of +coordinating with other team members who want hidden files. Hence, a general +solution that solves all these issues is to alias `ls` to `ls -a`, which is done +in the environment initialization file for the project. + + + diff --git a/documentation/for_developers.md b/documentation/for_developers.md new file mode 100644 index 0000000..c702f7f --- /dev/null +++ b/documentation/for_developers.md @@ -0,0 +1,17 @@ + + +The work area for developers is the `developer` directory. All other subdirectories +and files found at the top level are for project management. + +The best way to setup the environment and to enter the `developer` directory is +to use the `repo` command found in RT's `resource` project. The `repo` command +will start a new shell with the proper environment variables setup for the +project, and nothing else. + +A project can also be entered by sourcing `env_dev` by running the command +`. exector/env_dev` in a shell. `use_tool` is analogous to `activate` in Python. + + + + + diff --git a/documentation/log.md b/documentation/log.md new file mode 100644 index 0000000..9df41ca --- /dev/null +++ b/documentation/log.md @@ -0,0 +1,10 @@ +This is a project management log. This is not a code development log. (Git does +a pretty good job of that already.) + +2024-07-25 + formal git project directory creation + + Preparation, studies, and discussion occur before this time. + + + diff --git a/executor/env_dev b/executor/env_dev index 5c0627b..67c402b 100644 --- a/executor/env_dev +++ b/executor/env_dev @@ -12,11 +12,11 @@ if [ -z "$REPO_HOME" ]; then source "${script_path}/env_base" fi -export JAVA_HOME="$REPO_HOME/tool/jdk-22.0.1+8" -export CLASSPATH=".:$REPO_HOME/tool/executor/antlr-4.11.1-complete.jar:$CLASSPATH" export ANTLR_JAR="$REPO_HOME/tool/executor/antlr-4.11.1-complete.jar" +export JAVA_HOME="$REPO_HOME/tool/jdk-22.0.1+8" +export CLASSPATH="$ANTLR_JAR" -export PATH="$JAVA_HOME/bin:$REPO_HOME/tool/executor:$PATH" +export PATH="$REPO_HOME/tool/executor:$JAVA_HOME/bin:$PATH" alias ls="ls -a" cd "$REPO_HOME/developer" diff --git a/lector/README.md b/lector/README.md deleted file mode 100644 index 21b4176..0000000 --- a/lector/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -This directory contains project management documentation. Find here information -about the directory structure, tools used, and installation required for setting -up the environment. - -The `lector` directory under `developer` has documentation for the code -being developed. diff --git a/lector/directory_structure.md b/lector/directory_structure.md deleted file mode 100644 index c203397..0000000 --- a/lector/directory_structure.md +++ /dev/null @@ -1,90 +0,0 @@ - -# Property-Based Directory Naming - -We think of a directory name as being the value of a property, where -this property is shared by each and every file in said directory. -These are the properties, in order that we prefer to use them: - -* `processed_by` -* `category_processed_by` -* `purpose_of` - -The `processed_by` property is also known as `input`, which in turn is -often shortened to `in`. - - - -When source files of different types are mixed in a directory, we generalize -to the category. A mix of Rust and C files would be called `compiler` as the -category of the programs used to process the files. A mix of interpreted scripts -would be `interpreter`. - -When a directory contains mixed source files, we generalize to the category. A -directory with Rust and C files is named `compiler`, indicating the category of -programs used to process the files. A directory of interpreted scripts is named -`interpreter`. - -Files directly loaded and run by the machine are `processed_by` the architecture -(e.g., `x86`) or `category_processed_by` the `machine`. - -An interpreter creates machine code on-the-fly. Calling the interpreter is the -final step, similar calling the loader for a machine file. So we use a category -term of 'exector' for both an interpreter and a loader, so a directory -containing both interpreted files and machine files is named `executor`. - -A document is read by a person, so a document is processed by a `reader`. - -In cases where knowing the agent that will process the file does not give the -user insight as to what the file is for, we drop down to the `purpose_of` -property. - -# Implicit Property Name - -When naming a directory we typically don't add the property name, but only -put its value. Hence the directory that contains files to be processed -by the `javac` program will typically be called `javac`. When we need to include the -property name, we can add a suffix. `javac_input`. - -Sometimes a directory name will be held in a variable, and we need to further -distinguish the fact that the variable is referring to a directory. Say for -example a variable that holds the name of the directory that holds files -to be processed by `javac`, can be called `javac_input_directory`, and if -that is too long, `javac_in_dir` is typically used. - - -# Why directory names are often singular - -Each and every file in a directory has the same property value -for a given property. This property value becomes the name of the directory. -This creates an equivalence class of files based on the shared -property. - -For example, if a directory is named ‘compiler’, it signifies that -each file in the directory is processed by a compiler. - -However, there can be cases where the property value, though it comes from an -individual file, will be plural. For example, a directory could conceivably be -named ‘insights’, where each file in the directory contains multiple insights. -When we look at the property value for a single file, it is plural. - -# What is 'lector' - -It is a stuffy way to say `processed_by` the `reader`. At the top level directory -the lector is typically the project manager. In the developer's directory it is -typically the developer. In general it is anyone who reads the docs. - -# Hidden Files - -In my humble opinion that git uses hidden files is unfortunate. In general I -find hidden files in shared projects to be a bad idea. The approach of using a -symbolic link to give hidden files visible names creates a maintenance -problem. The idea of setting explicit parameters for git can be used to -address the git hidden files, but it does not expose other hidden files. -There is also an issue of coordinating with other team members who want -hidden files. Hence, a general solution that solves all these issues is -to alias `ls` to `ls -a`, which is done in the initialization file -for the project. - - - diff --git a/lector/for_developers.md b/lector/for_developers.md deleted file mode 100644 index c702f7f..0000000 --- a/lector/for_developers.md +++ /dev/null @@ -1,17 +0,0 @@ - - -The work area for developers is the `developer` directory. All other subdirectories -and files found at the top level are for project management. - -The best way to setup the environment and to enter the `developer` directory is -to use the `repo` command found in RT's `resource` project. The `repo` command -will start a new shell with the proper environment variables setup for the -project, and nothing else. - -A project can also be entered by sourcing `env_dev` by running the command -`. exector/env_dev` in a shell. `use_tool` is analogous to `activate` in Python. - - - - - diff --git a/lector/log.md b/lector/log.md deleted file mode 100644 index 9df41ca..0000000 --- a/lector/log.md +++ /dev/null @@ -1,10 +0,0 @@ -This is a project management log. This is not a code development log. (Git does -a pretty good job of that already.) - -2024-07-25 - formal git project directory creation - - Preparation, studies, and discussion occur before this time. - - -