Skip to main content
Version: 0.4.0

cars

// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg
//
// SPDX-License-Identifier: AGPL-3.0-only

// Example 1: Cars
// Learning goals:
// - Understand the core concepts pipeline, block, and pipe
// - Understand the general structure of a pipeline

// 1. This Jayvee model describes a pipeline
// from a CSV file in the web
// to a SQLite file sink.
pipeline CarsPipeline {

// 2. We describe the structure of the pipeline,
// usually at the top of the pipeline.
// by connecting blocks via pipes.

// 3. Syntax of a pipe
// connecting the block CarsExtractor
// with the block CarsTextFileInterpreter.
CarsExtractor -> CarsTextFileInterpreter;

// 4. The output of the preceding block is hereby used
// as input for the succeeding block.

// 5. Pipes can be further chained,
// leading to an overview of the pipeline.
CarsTextFileInterpreter
-> CarsCSVInterpreter
-> NameHeaderWriter
-> CarsTableInterpreter
-> CarsLoader;


// 6. Below the pipes, we usually define the blocks
// that are connected by the pipes.

// 7. Blocks instantiate a blocktype by using the oftype keyword.
// The blocktype defines the available properties that the block
// can use to specify the intended behavior of the block
block CarsExtractor oftype HttpExtractor {

// 8. Properties are assigned to concrete values.
// Here, we specify the URL where the file shall be downloaded from.
url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv";
}

// 9. The HttpExtractor requires no input and produces a binary file as output.
// This file has to be interpreted, e.g., as text file.
block CarsTextFileInterpreter oftype TextFileInterpreter { }

// 10. Next, we interpret the text file as sheet.
// A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells.
block CarsCSVInterpreter oftype CSVInterpreter {
enclosing: '"';
}

// 11. We can write into cells of a sheet using the CellWriter blocktype.
block NameHeaderWriter oftype CellWriter {
// 12. We utilize a syntax similar to spreadsheet programs.
// Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which
// cells are selected for the write action.
at: cell A1;

// 13. For each cell we selected with the "at" property above,
// we can specify what value shall be written into the cell.
write: ["name"];
}

// 14. As a next step, we interpret the sheet as a table by adding structure.
// We define a valuetype per column that specifies the data type of the column.
// Rows that include values that are not valid according to the their valuetypes are dropped automatically.
block CarsTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"name" oftype text,
"mpg" oftype decimal,
"cyl" oftype integer,
"disp" oftype decimal,
"hp" oftype integer,
"drat" oftype decimal,
"wt" oftype decimal,
"qsec" oftype decimal,
"vs" oftype integer,
"am" oftype integer,
"gear" oftype integer,
"carb" oftype integer
];
}

// 15. As a last step, we load the table into a sink,
// here into a sqlite file.
// The structural information of the table is used
// to generate the correct table.
block CarsLoader oftype SQLiteLoader {
table: "Cars";
file: "./cars.sqlite";
}

// 16. Congratulations!
// You can now use the sink for your data analysis, app,
// or whatever you want to do with the cleaned data.
}