Skip to main content
Version: Next

cars

// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

// Example 1: Cars
// Learning goals:
// - Understand the core concepts pipeline, block, and pipe
// - Understand the general structure of a pipeline

// 1. This Jayvee model describes a pipeline from a CSV file in the web to a
// SQLite file sink.
pipeline CarsPipeline {

// 2. We describe the structure of the pipeline, usually at the top of the
// pipeline, by connecting blocks via pipes.

// 3. Syntax of a pipe
// connecting the block CarsExtractor
// with the block CarsTextFileInterpreter.
CarsExtractor
-> CarsTextFileInterpreter;

// 4. The output of the preceding block is hereby used as input for the
// succeeding block.

// 5. Pipes can be further chained,
// leading to an overview of the pipeline.
CarsTextFileInterpreter
-> CarsCSVInterpreter
-> NameHeaderWriter
-> CarsTableInterpreter
-> CarsLoader;


// 6. Below the pipes, we usually define the blocks that are connected by the
// pipes.

// 7. Blocks instantiate a block type by using the oftype keyword.
// The block type defines the available properties that can be used to specify
// the intended behavior of the block.
block CarsExtractor oftype HttpExtractor {

// 8. Properties are assigned to concrete values.
// Here, we specify the URL where the file shall be downloaded from.
url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv";
}

// 9. The HttpExtractor requires no input and produces a binary file as
// output. This file has to be interpreted, e.g., as text file.
block CarsTextFileInterpreter oftype TextFileInterpreter { }

// 10. Next, we interpret the text file as sheet.
// A sheet only contains text cells and is useful for manipulating the shape
// of data before assigning more strict value types to cells.
block CarsCSVInterpreter oftype CSVInterpreter {
enclosing: '"';
}

// 11. We can write into cells of a sheet using the CellWriter block type.
block NameHeaderWriter oftype CellWriter {
// 12. We utilize a syntax similar to spreadsheet programs.
// Cell ranges can be described using the keywords "cell", "row", "column",
// or "range" that indicate which cells are selected for the write action.
at: cell A1;

// 13. For each cell we selected with the "at" property above, we can
// specify what value shall be written into the cell.
write: [
"name"
];
}

// 14. Next, we define the schema of our table.
// For this, we use a value type where each property corresponds to a column
// in the table. The column will have the same valuetype as the property.
valuetype Car {
property name oftype text;
property mpg oftype decimal;
property cyl oftype integer;
property disp oftype decimal;
property hp oftype integer;
property drat oftype decimal;
property wt oftype decimal;
property qsec oftype decimal;
property vs oftype integer;
property am oftype integer;
property gear oftype integer;
property carb oftype integer;
}

transform CarParser {
from r oftype Collection<text>;
to car oftype Car;

car: {
name: asText (r cellInColumn "name"),
mpg: asDecimal (r cellInColumn "mpg"),
cyl: asInteger (r cellInColumn 2),
disp: asDecimal (r cellInColumn 3),
hp: asInteger (r cellInColumn "hp"),
drat: asDecimal (r cellInColumn "drat"),
wt: asDecimal (r cellInColumn "wt"),
qsec: asDecimal (r cellInColumn "qsec"),
vs: asInteger (r cellInColumn "vs"),
am: asInteger (r cellInColumn "am"),
gear: asInteger (r cellInColumn "gear"),
carb: asInteger (r cellInColumn "carb")
};
}

// 15. As a next step, we interpret the sheet as a table, using the valuetype
// defined above. Rows that include values that are not valid according to the
// their value types are dropped automatically.
block CarsTableInterpreter oftype TableInterpreter {
header: true;
columns: Car;
parseWith: CarParser;
}

// 16. As a last step, we load the table into a sink, here into a sqlite file.
// The structural information of the table is used to generate the correct
// table.
block CarsLoader oftype SQLiteLoader {
table: "Cars";
file: "./cars.sqlite";
}

// 17. Congratulations!
// You can now use the sink for your data analysis, app, or whatever you want
// to do with the cleaned data.
}