From 4c25be57530278d00d6b132adca428f5876eef3d Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Mon, 13 May 2024 10:44:59 +0200 Subject: [PATCH 1/9] Add custom Jayvee formatter Co-authored-by: joluj --- example/cars.jv | 11 ++- example/electric-vehicles.jv | 94 ++++++++++++++----- example/gtfs-rt.jv | 92 +++++++++--------- example/gtfs-static.jv | 77 ++++++++++----- example/workbooks-xlsx.jv | 16 +++- libs/language-server/src/lib/jayvee-module.ts | 2 + .../language-server/src/lib/services/index.ts | 1 + .../src/lib/services/jayvee-formatter.ts | 64 +++++++++++++ 8 files changed, 259 insertions(+), 98 deletions(-) create mode 100644 libs/language-server/src/lib/services/jayvee-formatter.ts diff --git a/example/cars.jv b/example/cars.jv index f4fbb25d..a5bed357 100644 --- a/example/cars.jv +++ b/example/cars.jv @@ -19,7 +19,8 @@ pipeline CarsPipeline { // 3. Syntax of a pipe // connecting the block CarsExtractor // with the block CarsTextFileInterpreter. - CarsExtractor -> CarsTextFileInterpreter; + CarsExtractor + -> CarsTextFileInterpreter; // 4. The output of the preceding block is hereby used // as input for the succeeding block. @@ -27,9 +28,9 @@ pipeline CarsPipeline { // 5. Pipes can be further chained, // leading to an overview of the pipeline. CarsTextFileInterpreter - -> CarsCSVInterpreter + -> CarsCSVInterpreter -> NameHeaderWriter - -> CarsTableInterpreter + -> CarsTableInterpreter -> CarsLoader; @@ -65,7 +66,9 @@ pipeline CarsPipeline { // 13. For each cell we selected with the "at" property above, // we can specify what value shall be written into the cell. - write: ["name"]; + write: [ + "name" + ]; } // 14. As a next step, we interpret the sheet as a table by adding structure. diff --git a/example/electric-vehicles.jv b/example/electric-vehicles.jv index 154d69d9..b57b4ea0 100644 --- a/example/electric-vehicles.jv +++ b/example/electric-vehicles.jv @@ -21,15 +21,15 @@ pipeline ElectricVehiclesPipeline { // of blocks. From there we can see a split into two // parallel sequences that load the data in to two // different sinks. - ElectricVehiclesHttpExtractor + ElectricVehiclesHttpExtractor -> ElectricVehiclesTextFileInterpreter -> ElectricVehiclesCSVInterpreter -> ElectricVehiclesTableInterpreter -> ElectricRangeTransformer; - + ElectricRangeTransformer -> ElectricVehiclesSQLiteLoader; - + ElectricRangeTransformer -> ElectricVehiclesPostgresLoader; @@ -48,7 +48,7 @@ pipeline ElectricVehiclesPipeline { // 4. Here, a user-deifned value type is used to describe this column. // The capital letter indicates that the value type is not built-in // by convention. The value type itself is defined further below. - "VIN (1-10)" oftype VehicleIdentificationNumber10, + "VIN (1-10)" oftype VehicleIdentificationNumber10, "County" oftype text, "City" oftype text, "State" oftype UsStateCode, @@ -65,7 +65,7 @@ pipeline ElectricVehiclesPipeline { "Vehicle Location" oftype text, "Electric Utility" oftype text, "2020 Census Tract" oftype text, - ]; + ]; } // 5. This block describes the application of a transform function @@ -73,7 +73,9 @@ pipeline ElectricVehiclesPipeline { // The applied transform function is defined below and referenced // by the "use" property. block ElectricRangeTransformer oftype TableTransformer { - inputColumns: ["Electric Range"]; + inputColumns: [ + "Electric Range" + ]; outputColumn: "Electric Range (km)"; use: MilesToKilometers; } @@ -112,7 +114,8 @@ pipeline ElectricVehiclesPipeline { // that this value type builts on. User-defined value types always place additional constraints on existing value types. valuetype VehicleIdentificationNumber10 oftype text { // 10. Value types can be further refined by providing constraints. - constraints: [ + constraints: + [ OnlyCapitalLettersAndDigits, ExactlyTenCharacters, ]; @@ -120,24 +123,73 @@ valuetype VehicleIdentificationNumber10 oftype text { // 11. This constraint works on text value types and requires values // to match a given regular expression in order to be valid. -constraint OnlyCapitalLettersAndDigits on text: - value matches /^[A-Z0-9]*$/; +constraint OnlyCapitalLettersAndDigits on text: value matches /^[A-Z0-9]*$/; -constraint ExactlyTenCharacters on text: - value.length == 10; +constraint ExactlyTenCharacters on text: value.length == 10; valuetype UsStateCode oftype text { - constraints: [ + constraints: + [ UsStateCodeAllowlist, ]; } -constraint UsStateCodeAllowlist on text: - value in [ - "AL", "AK", "AZ", "AR", "AS", "CA", "CO", "CT", "DE", "DC", - "FL", "GA", "GU", "HI", "ID", "IL", "IN", "IA", "KS", "KY", - "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", - "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "MP", "OH", "OK", - "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "TT", "UT", - "VT", "VA", "VI", "WA", "WV", "WI", "WY", - ]; +constraint UsStateCodeAllowlist on text: value in [ + "AL", + "AK", + "AZ", + "AR", + "AS", + "CA", + "CO", + "CT", + "DE", + "DC", + "FL", + "GA", + "GU", + "HI", + "ID", + "IL", + "IN", + "IA", + "KS", + "KY", + "LA", + "ME", + "MD", + "MA", + "MI", + "MN", + "MS", + "MO", + "MT", + "NE", + "NV", + "NH", + "NJ", + "NM", + "NY", + "NC", + "ND", + "MP", + "OH", + "OK", + "OR", + "PA", + "PR", + "RI", + "SC", + "SD", + "TN", + "TX", + "TT", + "UT", + "VT", + "VA", + "VI", + "WA", + "WV", + "WI", + "WY", +]; diff --git a/example/gtfs-rt.jv b/example/gtfs-rt.jv index adc6aec6..76619423 100644 --- a/example/gtfs-rt.jv +++ b/example/gtfs-rt.jv @@ -18,30 +18,30 @@ pipeline GtfsRTSimplePipeline { ->GtfsRTTripUpdateInterpreter ->TripUpdateTableInterpreter ->TripUpdateLoader; - + GTFSRTVehiclePositionFeedExtractor ->GtfsRTVehiclePositionInterpreter ->VehiclePositionTableInterpreter ->VehicleLoader; - + GTFSRTAlertFeedExtractor - ->GtfsRTAlertInterpreter + ->GtfsRTAlertInterpreter ->AlertTableInterpreter ->AlertLoader; // 3. We define a series of HttpExtractors that each pull data // from an HTTP endpoint - block GTFSRTTripUpdateFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-trip-update"; - } + block GTFSRTTripUpdateFeedExtractor oftype HttpExtractor { + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-trip-update"; + } block GTFSRTVehiclePositionFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-vehicle-position"; - } + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-vehicle-position"; + } block GTFSRTAlertFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-alerts"; - } + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-alerts"; + } // 4. In the next step, we use the domain-specific GtfsRTInterpreter // to interpret the fetched files as sheets @@ -59,69 +59,69 @@ pipeline GtfsRTSimplePipeline { // 5. Next, we interpret the sheets as tables block TripUpdateTableInterpreter oftype TableInterpreter { - header: true; - columns:[ - "header.gtfs_realtime_version" oftype text, - "header.timestamp" oftype text, - "header.incrementality" oftype text, - "entity.id" oftype text, - "entity.trip_update.trip.trip_id" oftype text, + header: true; + columns: [ + "header.gtfs_realtime_version" oftype text, + "header.timestamp" oftype text, + "header.incrementality" oftype text, + "entity.id" oftype text, + "entity.trip_update.trip.trip_id" oftype text, "entity.trip_update.trip.route_id" oftype text, - "entity.trip_update.stop_time_update.stop_sequence" oftype text, + "entity.trip_update.stop_time_update.stop_sequence" oftype text, "entity.trip_update.stop_time_update.stop_id" oftype text, - "entity.trip_update.stop_time_update.arrival.time" oftype text, + "entity.trip_update.stop_time_update.arrival.time" oftype text, "entity.trip_update.stop_time_update.departure.time" oftype text, - ]; - } + ]; + } block VehiclePositionTableInterpreter oftype TableInterpreter { - header: true; - columns:[ - "header.gtfs_realtime_version" oftype text, - "header.timestamp" oftype text, - "header.incrementality" oftype text, - "entity.id" oftype text, - "entity.vehicle_position.vehicle_descriptor.id" oftype text, - "entity.vehicle_position.trip.trip_id" oftype text, - "entity.vehicle_position.trip.route_id" oftype text, - "entity.vehicle_position.position.latitude" oftype text, - "entity.vehicle_position.position.longitude" oftype text, + header: true; + columns: [ + "header.gtfs_realtime_version" oftype text, + "header.timestamp" oftype text, + "header.incrementality" oftype text, + "entity.id" oftype text, + "entity.vehicle_position.vehicle_descriptor.id" oftype text, + "entity.vehicle_position.trip.trip_id" oftype text, + "entity.vehicle_position.trip.route_id" oftype text, + "entity.vehicle_position.position.latitude" oftype text, + "entity.vehicle_position.position.longitude" oftype text, "entity.vehicle_position.timestamp" oftype text ]; - } + } block AlertTableInterpreter oftype TableInterpreter { - header: true; - columns:[ + header: true; + columns: [ 'header.gtfs_realtime_version' oftype text, 'header.timestamp' oftype text, - 'header.incrementality' oftype text, + 'header.incrementality' oftype text, 'entity.id' oftype text, 'entity.alert.informed_entity.route_id' oftype text, 'entity.alert.header_text' oftype text, 'entity.alert.description_text' oftype text, ]; - } + } // 6. Last, we load the tables into the same SQLite file. // Each loader has to define a different table name. // For working with live data, we use the property "dropTable: false" // to append data instead of deleting the previous data. block TripUpdateLoader oftype SQLiteLoader { - table: "gtfs-rt-trip_update"; - file: "./gtfs.sqlite"; + table: "gtfs-rt-trip_update"; + file: "./gtfs.sqlite"; dropTable: false; - } + } block VehicleLoader oftype SQLiteLoader { - table: "gtfs-rt-vehicle_position"; - file: "./gtfs.sqlite"; + table: "gtfs-rt-vehicle_position"; + file: "./gtfs.sqlite"; dropTable: false; - } + } block AlertLoader oftype SQLiteLoader { - table: "gtfs-rt-alert"; - file: "./gtfs.sqlite"; + table: "gtfs-rt-alert"; + file: "./gtfs.sqlite"; dropTable: false; - } + } } \ No newline at end of file diff --git a/example/gtfs-static.jv b/example/gtfs-static.jv index 9e36e3c4..c8bef46e 100644 --- a/example/gtfs-static.jv +++ b/example/gtfs-static.jv @@ -18,44 +18,44 @@ pipeline GtfsPipeline { -> AgencyInterpreter -> AgencyLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> CalendarDatesInterpreter -> CalendarDatesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> CalendarInterpreter -> CalendarLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> FareAttributesInterpreter -> FareAttributesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> FareRulesInterpreter -> FareRulesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> FrequenciesInterpreter -> FrequenciesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> RoutesInterpreter -> RoutesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> ShapesInterpreter -> ShapesLoader; - GTFSSampleFeedExtractor + GTFSSampleFeedExtractor -> StopTimesInterpreter -> StopTimesLoader; GTFSSampleFeedExtractor - -> StopsInterpreter + -> StopsInterpreter -> StopsLoader; - GTFSSampleFeedExtractor - -> TripsInterpreter + GTFSSampleFeedExtractor + -> TripsInterpreter -> TripsLoader; // 3. As a first step, we download the zip file and interpret it. @@ -78,15 +78,48 @@ pipeline GtfsPipeline { block TripsInterpreter oftype GTFSTripsInterpreter { } // 5. Finally, write the interpreted tables into a SQLite database - block AgencyLoader oftype SQLiteLoader { table: "agency"; file: "./gtfs.sqlite"; } - block CalendarDatesLoader oftype SQLiteLoader { table: "calendar_dates"; file: "./gtfs.sqlite"; } - block CalendarLoader oftype SQLiteLoader { table: "calendar"; file: "./gtfs.sqlite"; } - block FareAttributesLoader oftype SQLiteLoader { table: "fare_attributes"; file: "./gtfs.sqlite"; } - block FareRulesLoader oftype SQLiteLoader { table: "fare_rules"; file: "./gtfs.sqlite"; } - block FrequenciesLoader oftype SQLiteLoader { table: "frequencies"; file: "./gtfs.sqlite"; } - block RoutesLoader oftype SQLiteLoader { table: "routes"; file: "./gtfs.sqlite"; } - block ShapesLoader oftype SQLiteLoader { table: "shapes"; file: "./gtfs.sqlite"; } - block StopTimesLoader oftype SQLiteLoader { table: "stop_times"; file: "./gtfs.sqlite"; } - block StopsLoader oftype SQLiteLoader { table: "stops"; file: "./gtfs.sqlite"; } - block TripsLoader oftype SQLiteLoader { table: "trips"; file: "./gtfs.sqlite"; } + block AgencyLoader oftype SQLiteLoader { + table: "agency"; + file: "./gtfs.sqlite"; + } + block CalendarDatesLoader oftype SQLiteLoader { + table: "calendar_dates"; + file: "./gtfs.sqlite"; + } + block CalendarLoader oftype SQLiteLoader { + table: "calendar"; + file: "./gtfs.sqlite"; + } + block FareAttributesLoader oftype SQLiteLoader { + table: "fare_attributes"; + file: "./gtfs.sqlite"; + } + block FareRulesLoader oftype SQLiteLoader { + table: "fare_rules"; + file: "./gtfs.sqlite"; + } + block FrequenciesLoader oftype SQLiteLoader { + table: "frequencies"; + file: "./gtfs.sqlite"; + } + block RoutesLoader oftype SQLiteLoader { + table: "routes"; + file: "./gtfs.sqlite"; + } + block ShapesLoader oftype SQLiteLoader { + table: "shapes"; + file: "./gtfs.sqlite"; + } + block StopTimesLoader oftype SQLiteLoader { + table: "stop_times"; + file: "./gtfs.sqlite"; + } + block StopsLoader oftype SQLiteLoader { + table: "stops"; + file: "./gtfs.sqlite"; + } + block TripsLoader oftype SQLiteLoader { + table: "trips"; + file: "./gtfs.sqlite"; + } } \ No newline at end of file diff --git a/example/workbooks-xlsx.jv b/example/workbooks-xlsx.jv index 7f861fa6..65b0d349 100644 --- a/example/workbooks-xlsx.jv +++ b/example/workbooks-xlsx.jv @@ -21,9 +21,7 @@ pipeline LightTrappingSiliconSolarCellsPipeline { // 3. The incoming file is interpreted as a XLSX file and transformed into a Workbook // Workbooks contain at least 1 Sheet. Every sheet has a unique name. - block LightTrappingSiliconSolarCellsTextXLSXInterpreter oftype XLSXInterpreter { - - } + block LightTrappingSiliconSolarCellsTextXLSXInterpreter oftype XLSXInterpreter { } // 4.1 Here, we pick one sheet with the name 'RefractiveIndexSi GaAs' from the Workbook to use within our pipeline. // The output type from SheetPicker is Sheet, which was already introduced in the cars example @@ -32,8 +30,16 @@ pipeline LightTrappingSiliconSolarCellsPipeline { } block NameHeaderWriter oftype CellWriter { - at: range F1:L1; - write: ["F","G","nm","wl","n2", "k2", "alpha (cm-1)2"]; + at: range F1: L1; + write: [ + "F", + "G", + "nm", + "wl", + "n2", + "k2", + "alpha (cm-1)2" + ]; } block LightTrappingSiliconSolarCellsTableInterpreter oftype TableInterpreter { diff --git a/libs/language-server/src/lib/jayvee-module.ts b/libs/language-server/src/lib/jayvee-module.ts index a9150dff..5fefbfc8 100644 --- a/libs/language-server/src/lib/jayvee-module.ts +++ b/libs/language-server/src/lib/jayvee-module.ts @@ -28,6 +28,7 @@ import { JayveeWorkspaceManager } from './builtin-library/jayvee-workspace-manag import { JayveeCompletionProvider } from './completion/jayvee-completion-provider'; import { JayveeHoverProvider } from './hover/jayvee-hover-provider'; import { JayveeValueConverter } from './jayvee-value-converter'; +import { JayveeFormatter } from './services'; import { RuntimeParameterProvider } from './services/runtime-parameter-provider'; import { JayveeValidationRegistry } from './validation/validation-registry'; @@ -76,6 +77,7 @@ export const JayveeModule: Module< new JayveeCompletionProvider(services), HoverProvider: (services: JayveeServices) => new JayveeHoverProvider(services), + Formatter: () => new JayveeFormatter(), }, RuntimeParameterProvider: () => new RuntimeParameterProvider(), operators: { diff --git a/libs/language-server/src/lib/services/index.ts b/libs/language-server/src/lib/services/index.ts index d49eebc1..c002fac6 100644 --- a/libs/language-server/src/lib/services/index.ts +++ b/libs/language-server/src/lib/services/index.ts @@ -3,3 +3,4 @@ // SPDX-License-Identifier: AGPL-3.0-only export * from './runtime-parameter-provider'; +export * from './jayvee-formatter'; diff --git a/libs/language-server/src/lib/services/jayvee-formatter.ts b/libs/language-server/src/lib/services/jayvee-formatter.ts new file mode 100644 index 00000000..c2c6a4ce --- /dev/null +++ b/libs/language-server/src/lib/services/jayvee-formatter.ts @@ -0,0 +1,64 @@ +import { + type AstNode, + type LangiumDocument, + type MaybePromise, + isCompositeCstNode, +} from 'langium'; +import { AbstractFormatter, Formatting } from 'langium/lsp'; +import { + type DocumentFormattingParams, + type TextEdit, +} from 'vscode-languageserver-protocol'; + +import { isPipeDefinition } from '../ast/generated/ast'; + +export class JayveeFormatter extends AbstractFormatter { + protected override format(node: AstNode) { + const formatter = this.getNodeFormatter(node); + this.formatParenthesis(node, '{', '}'); + this.formatParenthesis(node, '[', ']'); + + formatter.keywords(',', ':', ';').prepend(Formatting.noSpace()); + formatter.keywords(':').append(Formatting.oneSpace()); + formatter.keywords('block').append(Formatting.oneSpace()); + formatter.keywords('oftype', 'cell').surround(Formatting.oneSpace()); + + if (isPipeDefinition(node)) { + formatter.keywords('->').prepend(Formatting.indent()); + } + } + + private formatParenthesis(node: AstNode, start: string, end: string) { + const formatter = this.getNodeFormatter(node); + if (!isCompositeCstNode(node.$cstNode)) { + return; + } + + const openingBraces = formatter.keywords(start); + const closingBraces = formatter.keyword(end); + const interior = formatter.interior(openingBraces, closingBraces); + if (interior.nodes.length > 0) { + interior.prepend(Formatting.indent({ allowMore: true })); + openingBraces + .prepend(Formatting.noIndent()) + .prepend(Formatting.oneSpace()); + closingBraces + .prepend(Formatting.noIndent()) + .prepend(Formatting.newLine()); + } else { + openingBraces + .prepend(Formatting.noIndent()) + .prepend(Formatting.oneSpace()); + closingBraces + .prepend(Formatting.noIndent()) + .prepend(Formatting.oneSpace()); + } + } + + override formatDocument( + document: LangiumDocument, + params: DocumentFormattingParams, + ): MaybePromise { + return super.formatDocument(document, params); + } +} From e668773d9a4bbe216aef5dcad3b511119cc58a5a Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Mon, 13 May 2024 10:58:16 +0200 Subject: [PATCH 2/9] Add remaining keywords to Jayvee formatter --- example/electric-vehicles.jv | 6 ++-- example/gtfs-rt.jv | 6 ++-- .../src/lib/services/jayvee-formatter.ts | 34 +++++++++++++++++-- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/example/electric-vehicles.jv b/example/electric-vehicles.jv index b57b4ea0..115bca06 100644 --- a/example/electric-vehicles.jv +++ b/example/electric-vehicles.jv @@ -114,8 +114,7 @@ pipeline ElectricVehiclesPipeline { // that this value type builts on. User-defined value types always place additional constraints on existing value types. valuetype VehicleIdentificationNumber10 oftype text { // 10. Value types can be further refined by providing constraints. - constraints: - [ + constraints : [ OnlyCapitalLettersAndDigits, ExactlyTenCharacters, ]; @@ -128,8 +127,7 @@ constraint OnlyCapitalLettersAndDigits on text: value matches /^[A-Z0-9]*$/; constraint ExactlyTenCharacters on text: value.length == 10; valuetype UsStateCode oftype text { - constraints: - [ + constraints : [ UsStateCodeAllowlist, ]; } diff --git a/example/gtfs-rt.jv b/example/gtfs-rt.jv index 76619423..14d0590e 100644 --- a/example/gtfs-rt.jv +++ b/example/gtfs-rt.jv @@ -60,7 +60,7 @@ pipeline GtfsRTSimplePipeline { // 5. Next, we interpret the sheets as tables block TripUpdateTableInterpreter oftype TableInterpreter { header: true; - columns: [ + columns: [ "header.gtfs_realtime_version" oftype text, "header.timestamp" oftype text, "header.incrementality" oftype text, @@ -76,7 +76,7 @@ pipeline GtfsRTSimplePipeline { block VehiclePositionTableInterpreter oftype TableInterpreter { header: true; - columns: [ + columns: [ "header.gtfs_realtime_version" oftype text, "header.timestamp" oftype text, "header.incrementality" oftype text, @@ -92,7 +92,7 @@ pipeline GtfsRTSimplePipeline { block AlertTableInterpreter oftype TableInterpreter { header: true; - columns: [ + columns: [ 'header.gtfs_realtime_version' oftype text, 'header.timestamp' oftype text, 'header.incrementality' oftype text, diff --git a/libs/language-server/src/lib/services/jayvee-formatter.ts b/libs/language-server/src/lib/services/jayvee-formatter.ts index c2c6a4ce..00f8cd2b 100644 --- a/libs/language-server/src/lib/services/jayvee-formatter.ts +++ b/libs/language-server/src/lib/services/jayvee-formatter.ts @@ -10,7 +10,7 @@ import { type TextEdit, } from 'vscode-languageserver-protocol'; -import { isPipeDefinition } from '../ast/generated/ast'; +import { isBlockTypePipeline, isPipeDefinition } from '../ast/generated/ast'; export class JayveeFormatter extends AbstractFormatter { protected override format(node: AstNode) { @@ -20,10 +20,38 @@ export class JayveeFormatter extends AbstractFormatter { formatter.keywords(',', ':', ';').prepend(Formatting.noSpace()); formatter.keywords(':').append(Formatting.oneSpace()); + formatter + .keywords('builtin', 'property', 'requires') + .append(Formatting.oneSpace()); + + formatter + .keywords('blocktype', 'composite', 'input', 'output') + .append(Formatting.oneSpace()); + formatter.keywords('block').append(Formatting.oneSpace()); - formatter.keywords('oftype', 'cell').surround(Formatting.oneSpace()); + formatter + .keywords('constraint', 'constrainttype') + .append(Formatting.oneSpace()); + formatter.keywords('oftype').surround(Formatting.oneSpace()); + formatter.keywords('on').surround(Formatting.oneSpace()); + + formatter.keywords('iotype').append(Formatting.oneSpace()); + formatter + .keywords('valuetype', 'constraints') + .append(Formatting.oneSpace()); + formatter + .keywords('<', '>') + .append(Formatting.noSpace()) + .prepend(Formatting.noSpace()); + + formatter.keywords('transform', 'from', 'to').append(Formatting.oneSpace()); + + formatter + .keywords('cell', 'column', 'row', 'range') + .surround(Formatting.oneSpace()); - if (isPipeDefinition(node)) { + formatter.keywords('pipeline').append(Formatting.oneSpace()); + if (isPipeDefinition(node) || isBlockTypePipeline(node)) { formatter.keywords('->').prepend(Formatting.indent()); } } From 83b2e3b3233c8194f36d0ed651f62cd91c900efb Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Mon, 13 May 2024 11:10:00 +0200 Subject: [PATCH 3/9] Refactor parenthesis formatting --- .../src/lib/services/jayvee-formatter.ts | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/libs/language-server/src/lib/services/jayvee-formatter.ts b/libs/language-server/src/lib/services/jayvee-formatter.ts index 00f8cd2b..cbc892fe 100644 --- a/libs/language-server/src/lib/services/jayvee-formatter.ts +++ b/libs/language-server/src/lib/services/jayvee-formatter.ts @@ -65,22 +65,19 @@ export class JayveeFormatter extends AbstractFormatter { const openingBraces = formatter.keywords(start); const closingBraces = formatter.keyword(end); const interior = formatter.interior(openingBraces, closingBraces); - if (interior.nodes.length > 0) { - interior.prepend(Formatting.indent({ allowMore: true })); - openingBraces - .prepend(Formatting.noIndent()) - .prepend(Formatting.oneSpace()); - closingBraces - .prepend(Formatting.noIndent()) - .prepend(Formatting.newLine()); - } else { + if (interior.nodes.length === 0) { openingBraces .prepend(Formatting.noIndent()) .prepend(Formatting.oneSpace()); closingBraces .prepend(Formatting.noIndent()) .prepend(Formatting.oneSpace()); + return; } + + interior.prepend(Formatting.indent({ allowMore: true })); + openingBraces.prepend(Formatting.noIndent()).prepend(Formatting.oneSpace()); + closingBraces.prepend(Formatting.noIndent()).prepend(Formatting.newLine()); } override formatDocument( From 2050452473741906fc9e8eec3f09691bd97aaa22 Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Mon, 13 May 2024 17:19:31 +0200 Subject: [PATCH 4/9] Respect comments indentation in formatter --- .../src/lib/services/jayvee-formatter.ts | 128 ++++++++++++++++-- 1 file changed, 113 insertions(+), 15 deletions(-) diff --git a/libs/language-server/src/lib/services/jayvee-formatter.ts b/libs/language-server/src/lib/services/jayvee-formatter.ts index cbc892fe..9adcfdc5 100644 --- a/libs/language-server/src/lib/services/jayvee-formatter.ts +++ b/libs/language-server/src/lib/services/jayvee-formatter.ts @@ -1,14 +1,11 @@ +import { type AstNode, type CstNode, isCompositeCstNode } from 'langium'; import { - type AstNode, - type LangiumDocument, - type MaybePromise, - isCompositeCstNode, -} from 'langium'; -import { AbstractFormatter, Formatting } from 'langium/lsp'; -import { - type DocumentFormattingParams, - type TextEdit, -} from 'vscode-languageserver-protocol'; + AbstractFormatter, + Formatting, + type FormattingAction, + type FormattingContext, +} from 'langium/lsp'; +import { type Range, type TextEdit } from 'vscode-languageserver-protocol'; import { isBlockTypePipeline, isPipeDefinition } from '../ast/generated/ast'; @@ -80,10 +77,111 @@ export class JayveeFormatter extends AbstractFormatter { closingBraces.prepend(Formatting.noIndent()).prepend(Formatting.newLine()); } - override formatDocument( - document: LangiumDocument, - params: DocumentFormattingParams, - ): MaybePromise { - return super.formatDocument(document, params); + /** + * https://github.com/eclipse-langium/langium/issues/1351 + */ + protected override createHiddenTextEdits( + previous: CstNode | undefined, + hidden: CstNode, + formatting: FormattingAction | undefined, + context: FormattingContext, + ): TextEdit[] { + const edits: TextEdit[] = []; + + // Don't format the hidden node if it is on the same line as its previous node + const startLine = hidden.range.start.line; + if (previous && previous.range.end.line === startLine) { + return []; + } + + const startRange: Range = { + start: { + character: 0, + line: startLine, + }, + end: hidden.range.start, + }; + const hiddenStartText = context.document.getText(startRange); + const move = this.findFittingMove( + startRange, + formatting?.moves ?? [], + context, + ); + + const hiddenStartChar = this.getExistingIndentationCharacterCount( + hiddenStartText, + context, + ); + const expectedStartChar = this.getIndentationCharacterCount(context, move); + + const newStartText = (context.options.insertSpaces ? ' ' : '\t').repeat( + expectedStartChar, + ); + + if (newStartText === hiddenStartText) { + return []; + } + + const lines = hidden.text.split('\n'); + lines[0] = hiddenStartText + lines[0]; + for (let i = 0; i < lines.length; i++) { + const currentLine = startLine + i; + + edits.push({ + newText: newStartText, + range: { + start: { + line: currentLine, + character: 0, + }, + end: { + line: currentLine, + character: hiddenStartChar, + }, + }, + }); + } + + return edits; + } + + /** + * Creates edits to replace leading tabs and spaces according to config. + */ + protected createIndentHiddenTextEdits( + hidden: CstNode, + context: FormattingContext, + ): TextEdit[] { + const startLine = hidden.range.start.line; + const startRange: Range = { + start: { + character: 0, + line: startLine, + }, + end: hidden.range.start, + }; + const hiddenStartText = context.document.getText(startRange); + + if (context.options.insertSpaces) { + if (!hiddenStartText.includes('\t')) { + return []; + } + return [ + { + newText: hiddenStartText.replace('\t', ' '), + range: startRange, + }, + ]; + } + + if (!hiddenStartText.includes(' ')) { + return []; + } + return [ + { + newText: hiddenStartText.replace(' ', '\t'), + range: startRange, + }, + ]; } } From 6e764683673bbe1a91ef461b92d7d9df39153ca3 Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Tue, 14 May 2024 08:43:39 +0200 Subject: [PATCH 5/9] Move all lsp services to own lsp directory --- libs/language-server/src/lib/index.ts | 1 + libs/language-server/src/lib/jayvee-module.ts | 8 +++++--- libs/language-server/src/lib/lsp/index.ts | 7 +++++++ .../lib/{completion => lsp}/jayvee-completion-provider.ts | 0 .../src/lib/{services => lsp}/jayvee-formatter.ts | 4 ++++ .../src/lib/{hover => lsp}/jayvee-hover-provider.ts | 0 libs/language-server/src/lib/services/index.ts | 1 - 7 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 libs/language-server/src/lib/lsp/index.ts rename libs/language-server/src/lib/{completion => lsp}/jayvee-completion-provider.ts (100%) rename libs/language-server/src/lib/{services => lsp}/jayvee-formatter.ts (97%) rename libs/language-server/src/lib/{hover => lsp}/jayvee-hover-provider.ts (100%) diff --git a/libs/language-server/src/lib/index.ts b/libs/language-server/src/lib/index.ts index 3f68e73b..a6ef2067 100644 --- a/libs/language-server/src/lib/index.ts +++ b/libs/language-server/src/lib/index.ts @@ -8,5 +8,6 @@ export * from './docs'; export * from './services'; export * from './util'; export * from './validation'; +export * from './lsp'; export * from './jayvee-module'; diff --git a/libs/language-server/src/lib/jayvee-module.ts b/libs/language-server/src/lib/jayvee-module.ts index 5fefbfc8..b2f08204 100644 --- a/libs/language-server/src/lib/jayvee-module.ts +++ b/libs/language-server/src/lib/jayvee-module.ts @@ -25,10 +25,12 @@ import { import { ValueTypeProvider } from './ast/wrappers/value-type/primitive/primitive-value-type-provider'; import { WrapperFactoryProvider } from './ast/wrappers/wrapper-factory-provider'; import { JayveeWorkspaceManager } from './builtin-library/jayvee-workspace-manager'; -import { JayveeCompletionProvider } from './completion/jayvee-completion-provider'; -import { JayveeHoverProvider } from './hover/jayvee-hover-provider'; import { JayveeValueConverter } from './jayvee-value-converter'; -import { JayveeFormatter } from './services'; +import { + JayveeCompletionProvider, + JayveeFormatter, + JayveeHoverProvider, +} from './lsp'; import { RuntimeParameterProvider } from './services/runtime-parameter-provider'; import { JayveeValidationRegistry } from './validation/validation-registry'; diff --git a/libs/language-server/src/lib/lsp/index.ts b/libs/language-server/src/lib/lsp/index.ts new file mode 100644 index 00000000..9d903814 --- /dev/null +++ b/libs/language-server/src/lib/lsp/index.ts @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg +// +// SPDX-License-Identifier: AGPL-3.0-only + +export * from './jayvee-completion-provider'; +export * from './jayvee-formatter'; +export * from './jayvee-hover-provider'; diff --git a/libs/language-server/src/lib/completion/jayvee-completion-provider.ts b/libs/language-server/src/lib/lsp/jayvee-completion-provider.ts similarity index 100% rename from libs/language-server/src/lib/completion/jayvee-completion-provider.ts rename to libs/language-server/src/lib/lsp/jayvee-completion-provider.ts diff --git a/libs/language-server/src/lib/services/jayvee-formatter.ts b/libs/language-server/src/lib/lsp/jayvee-formatter.ts similarity index 97% rename from libs/language-server/src/lib/services/jayvee-formatter.ts rename to libs/language-server/src/lib/lsp/jayvee-formatter.ts index 9adcfdc5..1b936ebe 100644 --- a/libs/language-server/src/lib/services/jayvee-formatter.ts +++ b/libs/language-server/src/lib/lsp/jayvee-formatter.ts @@ -1,3 +1,7 @@ +// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg +// +// SPDX-License-Identifier: AGPL-3.0-only + import { type AstNode, type CstNode, isCompositeCstNode } from 'langium'; import { AbstractFormatter, diff --git a/libs/language-server/src/lib/hover/jayvee-hover-provider.ts b/libs/language-server/src/lib/lsp/jayvee-hover-provider.ts similarity index 100% rename from libs/language-server/src/lib/hover/jayvee-hover-provider.ts rename to libs/language-server/src/lib/lsp/jayvee-hover-provider.ts diff --git a/libs/language-server/src/lib/services/index.ts b/libs/language-server/src/lib/services/index.ts index c002fac6..d49eebc1 100644 --- a/libs/language-server/src/lib/services/index.ts +++ b/libs/language-server/src/lib/services/index.ts @@ -3,4 +3,3 @@ // SPDX-License-Identifier: AGPL-3.0-only export * from './runtime-parameter-provider'; -export * from './jayvee-formatter'; From e414815d81100eb09f4cfb5a4ce4fb2d70632c9c Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Tue, 14 May 2024 09:12:05 +0200 Subject: [PATCH 6/9] Document hidden text formatting --- .../src/lib/lsp/jayvee-formatter.ts | 51 +++---------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/libs/language-server/src/lib/lsp/jayvee-formatter.ts b/libs/language-server/src/lib/lsp/jayvee-formatter.ts index 1b936ebe..5c650893 100644 --- a/libs/language-server/src/lib/lsp/jayvee-formatter.ts +++ b/libs/language-server/src/lib/lsp/jayvee-formatter.ts @@ -82,6 +82,7 @@ export class JayveeFormatter extends AbstractFormatter { } /** + * Overwrite to work around this issue: * https://github.com/eclipse-langium/langium/issues/1351 */ protected override createHiddenTextEdits( @@ -92,10 +93,10 @@ export class JayveeFormatter extends AbstractFormatter { ): TextEdit[] { const edits: TextEdit[] = []; - // Don't format the hidden node if it is on the same line as its previous node + // We only format hidden nodes that are on their own line. const startLine = hidden.range.start.line; if (previous && previous.range.end.line === startLine) { - return []; + return edits; } const startRange: Range = { @@ -122,8 +123,11 @@ export class JayveeFormatter extends AbstractFormatter { expectedStartChar, ); + // Compare exact texts instead of char numbers + // to make sure the indent config (tabs vs. spaces) is respected. if (newStartText === hiddenStartText) { - return []; + // Don't add unnecessary edits if there is nothing to do. + return edits; } const lines = hidden.text.split('\n'); @@ -131,6 +135,7 @@ export class JayveeFormatter extends AbstractFormatter { for (let i = 0; i < lines.length; i++) { const currentLine = startLine + i; + // Replace the full start text, so tabs and spaces work in any case. edits.push({ newText: newStartText, range: { @@ -148,44 +153,4 @@ export class JayveeFormatter extends AbstractFormatter { return edits; } - - /** - * Creates edits to replace leading tabs and spaces according to config. - */ - protected createIndentHiddenTextEdits( - hidden: CstNode, - context: FormattingContext, - ): TextEdit[] { - const startLine = hidden.range.start.line; - const startRange: Range = { - start: { - character: 0, - line: startLine, - }, - end: hidden.range.start, - }; - const hiddenStartText = context.document.getText(startRange); - - if (context.options.insertSpaces) { - if (!hiddenStartText.includes('\t')) { - return []; - } - return [ - { - newText: hiddenStartText.replace('\t', ' '), - range: startRange, - }, - ]; - } - - if (!hiddenStartText.includes(' ')) { - return []; - } - return [ - { - newText: hiddenStartText.replace(' ', '\t'), - range: startRange, - }, - ]; - } } From 951ea3e8e932af52bd76596f3d4dd29e9cab7daa Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Thu, 16 May 2024 17:55:05 +0200 Subject: [PATCH 7/9] Format CellRangeLiterals --- example/workbooks-xlsx.jv | 2 +- libs/language-server/src/lib/lsp/jayvee-formatter.ts | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/example/workbooks-xlsx.jv b/example/workbooks-xlsx.jv index 65b0d349..7e7cbf44 100644 --- a/example/workbooks-xlsx.jv +++ b/example/workbooks-xlsx.jv @@ -30,7 +30,7 @@ pipeline LightTrappingSiliconSolarCellsPipeline { } block NameHeaderWriter oftype CellWriter { - at: range F1: L1; + at: range F1:L1; write: [ "F", "G", diff --git a/libs/language-server/src/lib/lsp/jayvee-formatter.ts b/libs/language-server/src/lib/lsp/jayvee-formatter.ts index 5c650893..b9da1ef5 100644 --- a/libs/language-server/src/lib/lsp/jayvee-formatter.ts +++ b/libs/language-server/src/lib/lsp/jayvee-formatter.ts @@ -11,7 +11,11 @@ import { } from 'langium/lsp'; import { type Range, type TextEdit } from 'vscode-languageserver-protocol'; -import { isBlockTypePipeline, isPipeDefinition } from '../ast/generated/ast'; +import { + isBlockTypePipeline, + isCellRangeLiteral, + isPipeDefinition, +} from '../ast/generated/ast'; export class JayveeFormatter extends AbstractFormatter { protected override format(node: AstNode) { @@ -55,6 +59,10 @@ export class JayveeFormatter extends AbstractFormatter { if (isPipeDefinition(node) || isBlockTypePipeline(node)) { formatter.keywords('->').prepend(Formatting.indent()); } + + if (isCellRangeLiteral(node)) { + formatter.keywords(':').append(Formatting.noSpace({ priority: 1 })); + } } private formatParenthesis(node: AstNode, start: string, end: string) { From b4f6cd14472b89baa28cf53923fbce84af0a1048 Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Thu, 16 May 2024 18:04:00 +0200 Subject: [PATCH 8/9] No space after 'constraints' keyword --- example/electric-vehicles.jv | 4 ++-- libs/language-server/src/lib/lsp/jayvee-formatter.ts | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/example/electric-vehicles.jv b/example/electric-vehicles.jv index 115bca06..b71ef252 100644 --- a/example/electric-vehicles.jv +++ b/example/electric-vehicles.jv @@ -114,7 +114,7 @@ pipeline ElectricVehiclesPipeline { // that this value type builts on. User-defined value types always place additional constraints on existing value types. valuetype VehicleIdentificationNumber10 oftype text { // 10. Value types can be further refined by providing constraints. - constraints : [ + constraints: [ OnlyCapitalLettersAndDigits, ExactlyTenCharacters, ]; @@ -127,7 +127,7 @@ constraint OnlyCapitalLettersAndDigits on text: value matches /^[A-Z0-9]*$/; constraint ExactlyTenCharacters on text: value.length == 10; valuetype UsStateCode oftype text { - constraints : [ + constraints: [ UsStateCodeAllowlist, ]; } diff --git a/libs/language-server/src/lib/lsp/jayvee-formatter.ts b/libs/language-server/src/lib/lsp/jayvee-formatter.ts index b9da1ef5..3d7c5ee3 100644 --- a/libs/language-server/src/lib/lsp/jayvee-formatter.ts +++ b/libs/language-server/src/lib/lsp/jayvee-formatter.ts @@ -41,9 +41,8 @@ export class JayveeFormatter extends AbstractFormatter { formatter.keywords('on').surround(Formatting.oneSpace()); formatter.keywords('iotype').append(Formatting.oneSpace()); - formatter - .keywords('valuetype', 'constraints') - .append(Formatting.oneSpace()); + formatter.keywords('valuetype').append(Formatting.oneSpace()); + formatter.keywords('constraints').append(Formatting.noSpace()); formatter .keywords('<', '>') .append(Formatting.noSpace()) From b29c8e742ecf0652f1ff013cecb566a75ef69b70 Mon Sep 17 00:00:00 2001 From: Georg Schwarz Date: Thu, 16 May 2024 18:10:03 +0200 Subject: [PATCH 9/9] Change indentation of jv examples to 2 spaces --- example/cars.jv | 184 ++++++++++---------- example/electric-vehicles.jv | 320 +++++++++++++++++------------------ example/gtfs-rt.jv | 224 ++++++++++++------------ example/gtfs-static.jv | 222 ++++++++++++------------ example/workbooks-xlsx.jv | 148 ++++++++-------- 5 files changed, 549 insertions(+), 549 deletions(-) diff --git a/example/cars.jv b/example/cars.jv index a5bed357..644cea63 100644 --- a/example/cars.jv +++ b/example/cars.jv @@ -12,96 +12,96 @@ // to a SQLite file sink. pipeline CarsPipeline { - // 2. We describe the structure of the pipeline, - // usually at the top of the pipeline. - // by connecting blocks via pipes. - - // 3. Syntax of a pipe - // connecting the block CarsExtractor - // with the block CarsTextFileInterpreter. - CarsExtractor - -> CarsTextFileInterpreter; - - // 4. The output of the preceding block is hereby used - // as input for the succeeding block. - - // 5. Pipes can be further chained, - // leading to an overview of the pipeline. - CarsTextFileInterpreter - -> CarsCSVInterpreter - -> NameHeaderWriter - -> CarsTableInterpreter - -> CarsLoader; - - - // 6. Below the pipes, we usually define the blocks - // that are connected by the pipes. - - // 7. Blocks instantiate a block type by using the oftype keyword. - // The block type defines the available properties that the block - // can use to specify the intended behavior of the block - block CarsExtractor oftype HttpExtractor { - - // 8. Properties are assigned to concrete values. - // Here, we specify the URL where the file shall be downloaded from. - url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv"; - } - - // 9. The HttpExtractor requires no input and produces a binary file as output. - // This file has to be interpreted, e.g., as text file. - block CarsTextFileInterpreter oftype TextFileInterpreter { } - - // 10. Next, we interpret the text file as sheet. - // A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells. - block CarsCSVInterpreter oftype CSVInterpreter { - enclosing: '"'; - } - - // 11. We can write into cells of a sheet using the CellWriter block type. - block NameHeaderWriter oftype CellWriter { - // 12. We utilize a syntax similar to spreadsheet programs. - // Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which - // cells are selected for the write action. - at: cell A1; - - // 13. For each cell we selected with the "at" property above, - // we can specify what value shall be written into the cell. - write: [ - "name" - ]; - } - - // 14. As a next step, we interpret the sheet as a table by adding structure. - // We define a value type per column that specifies the data type of the column. - // Rows that include values that are not valid according to the their value types are dropped automatically. - block CarsTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - "name" oftype text, - "mpg" oftype decimal, - "cyl" oftype integer, - "disp" oftype decimal, - "hp" oftype integer, - "drat" oftype decimal, - "wt" oftype decimal, - "qsec" oftype decimal, - "vs" oftype integer, - "am" oftype integer, - "gear" oftype integer, - "carb" oftype integer - ]; - } - - // 15. As a last step, we load the table into a sink, - // here into a sqlite file. - // The structural information of the table is used - // to generate the correct table. - block CarsLoader oftype SQLiteLoader { - table: "Cars"; - file: "./cars.sqlite"; - } - - // 16. Congratulations! - // You can now use the sink for your data analysis, app, - // or whatever you want to do with the cleaned data. + // 2. We describe the structure of the pipeline, + // usually at the top of the pipeline. + // by connecting blocks via pipes. + + // 3. Syntax of a pipe + // connecting the block CarsExtractor + // with the block CarsTextFileInterpreter. + CarsExtractor + -> CarsTextFileInterpreter; + + // 4. The output of the preceding block is hereby used + // as input for the succeeding block. + + // 5. Pipes can be further chained, + // leading to an overview of the pipeline. + CarsTextFileInterpreter + -> CarsCSVInterpreter + -> NameHeaderWriter + -> CarsTableInterpreter + -> CarsLoader; + + + // 6. Below the pipes, we usually define the blocks + // that are connected by the pipes. + + // 7. Blocks instantiate a block type by using the oftype keyword. + // The block type defines the available properties that the block + // can use to specify the intended behavior of the block + block CarsExtractor oftype HttpExtractor { + + // 8. Properties are assigned to concrete values. + // Here, we specify the URL where the file shall be downloaded from. + url: "https://gist.githubusercontent.com/noamross/e5d3e859aa0c794be10b/raw/b999fb4425b54c63cab088c0ce2c0d6ce961a563/cars.csv"; + } + + // 9. The HttpExtractor requires no input and produces a binary file as output. + // This file has to be interpreted, e.g., as text file. + block CarsTextFileInterpreter oftype TextFileInterpreter { } + + // 10. Next, we interpret the text file as sheet. + // A sheet only contains text cells and is useful for manipulating the shape of data before assigning more strict value types to cells. + block CarsCSVInterpreter oftype CSVInterpreter { + enclosing: '"'; + } + + // 11. We can write into cells of a sheet using the CellWriter block type. + block NameHeaderWriter oftype CellWriter { + // 12. We utilize a syntax similar to spreadsheet programs. + // Cell ranges can be described using the keywords "cell", "row", "column", or "range" that indicate which + // cells are selected for the write action. + at: cell A1; + + // 13. For each cell we selected with the "at" property above, + // we can specify what value shall be written into the cell. + write: [ + "name" + ]; + } + + // 14. As a next step, we interpret the sheet as a table by adding structure. + // We define a value type per column that specifies the data type of the column. + // Rows that include values that are not valid according to the their value types are dropped automatically. + block CarsTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "name" oftype text, + "mpg" oftype decimal, + "cyl" oftype integer, + "disp" oftype decimal, + "hp" oftype integer, + "drat" oftype decimal, + "wt" oftype decimal, + "qsec" oftype decimal, + "vs" oftype integer, + "am" oftype integer, + "gear" oftype integer, + "carb" oftype integer + ]; + } + + // 15. As a last step, we load the table into a sink, + // here into a sqlite file. + // The structural information of the table is used + // to generate the correct table. + block CarsLoader oftype SQLiteLoader { + table: "Cars"; + file: "./cars.sqlite"; + } + + // 16. Congratulations! + // You can now use the sink for your data analysis, app, + // or whatever you want to do with the cleaned data. } \ No newline at end of file diff --git a/example/electric-vehicles.jv b/example/electric-vehicles.jv index b71ef252..6fae249d 100644 --- a/example/electric-vehicles.jv +++ b/example/electric-vehicles.jv @@ -12,112 +12,112 @@ // from a CSV file in the web // to a SQLite file and a PostgreSQL db sink. pipeline ElectricVehiclesPipeline { - // See here for meta-data of the data source - // https://catalog.data.gov/dataset/electric-vehicle-population-data/resource/fa51be35-691f-45d2-9f3e-535877965e69 - - // 2. At the top of a pipeline, we describe the - // structure of the pipeline. The first part until - // the ElectricRangeTransformer is a linear sequence - // of blocks. From there we can see a split into two - // parallel sequences that load the data in to two - // different sinks. - ElectricVehiclesHttpExtractor - -> ElectricVehiclesTextFileInterpreter - -> ElectricVehiclesCSVInterpreter - -> ElectricVehiclesTableInterpreter - -> ElectricRangeTransformer; - - ElectricRangeTransformer - -> ElectricVehiclesSQLiteLoader; - - ElectricRangeTransformer - -> ElectricVehiclesPostgresLoader; - - // 3. After the pipeline structure, we define the blocks used. - block ElectricVehiclesHttpExtractor oftype HttpExtractor { - url: "https://data.wa.gov/api/views/f6w7-q2d2/rows.csv?accessType=DOWNLOAD"; - } - - block ElectricVehiclesTextFileInterpreter oftype TextFileInterpreter { } - - block ElectricVehiclesCSVInterpreter oftype CSVInterpreter { } - - block ElectricVehiclesTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - // 4. Here, a user-deifned value type is used to describe this column. - // The capital letter indicates that the value type is not built-in - // by convention. The value type itself is defined further below. - "VIN (1-10)" oftype VehicleIdentificationNumber10, - "County" oftype text, - "City" oftype text, - "State" oftype UsStateCode, - "Postal Code" oftype text, - "Model Year" oftype integer, - "Make" oftype text, - "Model" oftype text, - "Electric Vehicle Type" oftype text, - "Clean Alternative Fuel Vehicle (CAFV) Eligibility" oftype text, - "Electric Range" oftype integer, - "Base MSRP" oftype integer, - "Legislative District" oftype text, - "DOL Vehicle ID" oftype integer, - "Vehicle Location" oftype text, - "Electric Utility" oftype text, - "2020 Census Tract" oftype text, - ]; - } - - // 5. This block describes the application of a transform function - // taking a column as input and adding another computed column. - // The applied transform function is defined below and referenced - // by the "use" property. - block ElectricRangeTransformer oftype TableTransformer { - inputColumns: [ - "Electric Range" - ]; - outputColumn: "Electric Range (km)"; - use: MilesToKilometers; - } - - // 6. Here, we define a transform function, taking parameters - // as input ("from" keyword), and producing an output ("to" keyword). - // Inputs and outputs have to be further described by a value type. - transform MilesToKilometers { - from miles oftype decimal; - to kilometers oftype integer; - - // 7. In order to express what the transform function does, - // we assign an expression to the output. Values from the input and output of the transform can be referred to by name. - kilometers: round (miles * 1.609344); - } - - block ElectricVehiclesSQLiteLoader oftype SQLiteLoader { - table: "ElectricVehiclePopulationData"; - file: "./electric-vehicles.sqlite"; - } - - block ElectricVehiclesPostgresLoader oftype PostgresLoader { - // 8. The requires keyword allows us to define runtime parameters. - // These values have to be provided as environment variables when interpreting the Jayvee model. - host: requires DB_HOST; - port: requires DB_PORT; - username: requires DB_USERNAME; - password: requires DB_PASSWORD; - database: requires DB_DATABASE; - table: "ElectricVehiclePopulationData"; - } + // See here for meta-data of the data source + // https://catalog.data.gov/dataset/electric-vehicle-population-data/resource/fa51be35-691f-45d2-9f3e-535877965e69 + + // 2. At the top of a pipeline, we describe the + // structure of the pipeline. The first part until + // the ElectricRangeTransformer is a linear sequence + // of blocks. From there we can see a split into two + // parallel sequences that load the data in to two + // different sinks. + ElectricVehiclesHttpExtractor + -> ElectricVehiclesTextFileInterpreter + -> ElectricVehiclesCSVInterpreter + -> ElectricVehiclesTableInterpreter + -> ElectricRangeTransformer; + + ElectricRangeTransformer + -> ElectricVehiclesSQLiteLoader; + + ElectricRangeTransformer + -> ElectricVehiclesPostgresLoader; + + // 3. After the pipeline structure, we define the blocks used. + block ElectricVehiclesHttpExtractor oftype HttpExtractor { + url: "https://data.wa.gov/api/views/f6w7-q2d2/rows.csv?accessType=DOWNLOAD"; + } + + block ElectricVehiclesTextFileInterpreter oftype TextFileInterpreter { } + + block ElectricVehiclesCSVInterpreter oftype CSVInterpreter { } + + block ElectricVehiclesTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + // 4. Here, a user-deifned value type is used to describe this column. + // The capital letter indicates that the value type is not built-in + // by convention. The value type itself is defined further below. + "VIN (1-10)" oftype VehicleIdentificationNumber10, + "County" oftype text, + "City" oftype text, + "State" oftype UsStateCode, + "Postal Code" oftype text, + "Model Year" oftype integer, + "Make" oftype text, + "Model" oftype text, + "Electric Vehicle Type" oftype text, + "Clean Alternative Fuel Vehicle (CAFV) Eligibility" oftype text, + "Electric Range" oftype integer, + "Base MSRP" oftype integer, + "Legislative District" oftype text, + "DOL Vehicle ID" oftype integer, + "Vehicle Location" oftype text, + "Electric Utility" oftype text, + "2020 Census Tract" oftype text, + ]; + } + + // 5. This block describes the application of a transform function + // taking a column as input and adding another computed column. + // The applied transform function is defined below and referenced + // by the "use" property. + block ElectricRangeTransformer oftype TableTransformer { + inputColumns: [ + "Electric Range" + ]; + outputColumn: "Electric Range (km)"; + use: MilesToKilometers; + } + + // 6. Here, we define a transform function, taking parameters + // as input ("from" keyword), and producing an output ("to" keyword). + // Inputs and outputs have to be further described by a value type. + transform MilesToKilometers { + from miles oftype decimal; + to kilometers oftype integer; + + // 7. In order to express what the transform function does, + // we assign an expression to the output. Values from the input and output of the transform can be referred to by name. + kilometers: round (miles * 1.609344); + } + + block ElectricVehiclesSQLiteLoader oftype SQLiteLoader { + table: "ElectricVehiclePopulationData"; + file: "./electric-vehicles.sqlite"; + } + + block ElectricVehiclesPostgresLoader oftype PostgresLoader { + // 8. The requires keyword allows us to define runtime parameters. + // These values have to be provided as environment variables when interpreting the Jayvee model. + host: requires DB_HOST; + port: requires DB_PORT; + username: requires DB_USERNAME; + password: requires DB_PASSWORD; + database: requires DB_DATABASE; + table: "ElectricVehiclePopulationData"; + } } // 9. Below the pipeline, we model user-define value types. // We give them a speaking name and provide a base value type // that this value type builts on. User-defined value types always place additional constraints on existing value types. valuetype VehicleIdentificationNumber10 oftype text { - // 10. Value types can be further refined by providing constraints. - constraints: [ - OnlyCapitalLettersAndDigits, - ExactlyTenCharacters, - ]; + // 10. Value types can be further refined by providing constraints. + constraints: [ + OnlyCapitalLettersAndDigits, + ExactlyTenCharacters, + ]; } // 11. This constraint works on text value types and requires values @@ -127,67 +127,67 @@ constraint OnlyCapitalLettersAndDigits on text: value matches /^[A-Z0-9]*$/; constraint ExactlyTenCharacters on text: value.length == 10; valuetype UsStateCode oftype text { - constraints: [ - UsStateCodeAllowlist, - ]; + constraints: [ + UsStateCodeAllowlist, + ]; } constraint UsStateCodeAllowlist on text: value in [ - "AL", - "AK", - "AZ", - "AR", - "AS", - "CA", - "CO", - "CT", - "DE", - "DC", - "FL", - "GA", - "GU", - "HI", - "ID", - "IL", - "IN", - "IA", - "KS", - "KY", - "LA", - "ME", - "MD", - "MA", - "MI", - "MN", - "MS", - "MO", - "MT", - "NE", - "NV", - "NH", - "NJ", - "NM", - "NY", - "NC", - "ND", - "MP", - "OH", - "OK", - "OR", - "PA", - "PR", - "RI", - "SC", - "SD", - "TN", - "TX", - "TT", - "UT", - "VT", - "VA", - "VI", - "WA", - "WV", - "WI", - "WY", + "AL", + "AK", + "AZ", + "AR", + "AS", + "CA", + "CO", + "CT", + "DE", + "DC", + "FL", + "GA", + "GU", + "HI", + "ID", + "IL", + "IN", + "IA", + "KS", + "KY", + "LA", + "ME", + "MD", + "MA", + "MI", + "MN", + "MS", + "MO", + "MT", + "NE", + "NV", + "NH", + "NJ", + "NM", + "NY", + "NC", + "ND", + "MP", + "OH", + "OK", + "OR", + "PA", + "PR", + "RI", + "SC", + "SD", + "TN", + "TX", + "TT", + "UT", + "VT", + "VA", + "VI", + "WA", + "WV", + "WI", + "WY", ]; diff --git a/example/gtfs-rt.jv b/example/gtfs-rt.jv index 14d0590e..f1891d34 100644 --- a/example/gtfs-rt.jv +++ b/example/gtfs-rt.jv @@ -12,116 +12,116 @@ // to a SQLite file with multiple tables. pipeline GtfsRTSimplePipeline { - // 2. As you can see here, we have three independent - // sequences of pipes in this pipeline. - GTFSRTTripUpdateFeedExtractor - ->GtfsRTTripUpdateInterpreter - ->TripUpdateTableInterpreter - ->TripUpdateLoader; - - GTFSRTVehiclePositionFeedExtractor - ->GtfsRTVehiclePositionInterpreter - ->VehiclePositionTableInterpreter - ->VehicleLoader; - - GTFSRTAlertFeedExtractor - ->GtfsRTAlertInterpreter - ->AlertTableInterpreter - ->AlertLoader; - - // 3. We define a series of HttpExtractors that each pull data - // from an HTTP endpoint - block GTFSRTTripUpdateFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-trip-update"; - } - - block GTFSRTVehiclePositionFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-vehicle-position"; - } - - block GTFSRTAlertFeedExtractor oftype HttpExtractor { - url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-alerts"; - } - - // 4. In the next step, we use the domain-specific GtfsRTInterpreter - // to interpret the fetched files as sheets - block GtfsRTTripUpdateInterpreter oftype GtfsRTInterpreter { - entity: "trip_update"; - } - - block GtfsRTAlertInterpreter oftype GtfsRTInterpreter { - entity: "alert"; - } - - block GtfsRTVehiclePositionInterpreter oftype GtfsRTInterpreter { - entity: "vehicle"; - } - - // 5. Next, we interpret the sheets as tables - block TripUpdateTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - "header.gtfs_realtime_version" oftype text, - "header.timestamp" oftype text, - "header.incrementality" oftype text, - "entity.id" oftype text, - "entity.trip_update.trip.trip_id" oftype text, - "entity.trip_update.trip.route_id" oftype text, - "entity.trip_update.stop_time_update.stop_sequence" oftype text, - "entity.trip_update.stop_time_update.stop_id" oftype text, - "entity.trip_update.stop_time_update.arrival.time" oftype text, - "entity.trip_update.stop_time_update.departure.time" oftype text, - ]; - } - - block VehiclePositionTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - "header.gtfs_realtime_version" oftype text, - "header.timestamp" oftype text, - "header.incrementality" oftype text, - "entity.id" oftype text, - "entity.vehicle_position.vehicle_descriptor.id" oftype text, - "entity.vehicle_position.trip.trip_id" oftype text, - "entity.vehicle_position.trip.route_id" oftype text, - "entity.vehicle_position.position.latitude" oftype text, - "entity.vehicle_position.position.longitude" oftype text, - "entity.vehicle_position.timestamp" oftype text - ]; - } - - block AlertTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - 'header.gtfs_realtime_version' oftype text, - 'header.timestamp' oftype text, - 'header.incrementality' oftype text, - 'entity.id' oftype text, - 'entity.alert.informed_entity.route_id' oftype text, - 'entity.alert.header_text' oftype text, - 'entity.alert.description_text' oftype text, - ]; - } - - // 6. Last, we load the tables into the same SQLite file. - // Each loader has to define a different table name. - // For working with live data, we use the property "dropTable: false" - // to append data instead of deleting the previous data. - block TripUpdateLoader oftype SQLiteLoader { - table: "gtfs-rt-trip_update"; - file: "./gtfs.sqlite"; - dropTable: false; - } - - block VehicleLoader oftype SQLiteLoader { - table: "gtfs-rt-vehicle_position"; - file: "./gtfs.sqlite"; - dropTable: false; - } - - block AlertLoader oftype SQLiteLoader { - table: "gtfs-rt-alert"; - file: "./gtfs.sqlite"; - dropTable: false; - } + // 2. As you can see here, we have three independent + // sequences of pipes in this pipeline. + GTFSRTTripUpdateFeedExtractor + ->GtfsRTTripUpdateInterpreter + ->TripUpdateTableInterpreter + ->TripUpdateLoader; + + GTFSRTVehiclePositionFeedExtractor + ->GtfsRTVehiclePositionInterpreter + ->VehiclePositionTableInterpreter + ->VehicleLoader; + + GTFSRTAlertFeedExtractor + ->GtfsRTAlertInterpreter + ->AlertTableInterpreter + ->AlertLoader; + + // 3. We define a series of HttpExtractors that each pull data + // from an HTTP endpoint + block GTFSRTTripUpdateFeedExtractor oftype HttpExtractor { + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-trip-update"; + } + + block GTFSRTVehiclePositionFeedExtractor oftype HttpExtractor { + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-vehicle-position"; + } + + block GTFSRTAlertFeedExtractor oftype HttpExtractor { + url: "https://proxy.transport.data.gouv.fr/resource/bibus-brest-gtfs-rt-alerts"; + } + + // 4. In the next step, we use the domain-specific GtfsRTInterpreter + // to interpret the fetched files as sheets + block GtfsRTTripUpdateInterpreter oftype GtfsRTInterpreter { + entity: "trip_update"; + } + + block GtfsRTAlertInterpreter oftype GtfsRTInterpreter { + entity: "alert"; + } + + block GtfsRTVehiclePositionInterpreter oftype GtfsRTInterpreter { + entity: "vehicle"; + } + + // 5. Next, we interpret the sheets as tables + block TripUpdateTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "header.gtfs_realtime_version" oftype text, + "header.timestamp" oftype text, + "header.incrementality" oftype text, + "entity.id" oftype text, + "entity.trip_update.trip.trip_id" oftype text, + "entity.trip_update.trip.route_id" oftype text, + "entity.trip_update.stop_time_update.stop_sequence" oftype text, + "entity.trip_update.stop_time_update.stop_id" oftype text, + "entity.trip_update.stop_time_update.arrival.time" oftype text, + "entity.trip_update.stop_time_update.departure.time" oftype text, + ]; + } + + block VehiclePositionTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "header.gtfs_realtime_version" oftype text, + "header.timestamp" oftype text, + "header.incrementality" oftype text, + "entity.id" oftype text, + "entity.vehicle_position.vehicle_descriptor.id" oftype text, + "entity.vehicle_position.trip.trip_id" oftype text, + "entity.vehicle_position.trip.route_id" oftype text, + "entity.vehicle_position.position.latitude" oftype text, + "entity.vehicle_position.position.longitude" oftype text, + "entity.vehicle_position.timestamp" oftype text + ]; + } + + block AlertTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + 'header.gtfs_realtime_version' oftype text, + 'header.timestamp' oftype text, + 'header.incrementality' oftype text, + 'entity.id' oftype text, + 'entity.alert.informed_entity.route_id' oftype text, + 'entity.alert.header_text' oftype text, + 'entity.alert.description_text' oftype text, + ]; + } + + // 6. Last, we load the tables into the same SQLite file. + // Each loader has to define a different table name. + // For working with live data, we use the property "dropTable: false" + // to append data instead of deleting the previous data. + block TripUpdateLoader oftype SQLiteLoader { + table: "gtfs-rt-trip_update"; + file: "./gtfs.sqlite"; + dropTable: false; + } + + block VehicleLoader oftype SQLiteLoader { + table: "gtfs-rt-vehicle_position"; + file: "./gtfs.sqlite"; + dropTable: false; + } + + block AlertLoader oftype SQLiteLoader { + table: "gtfs-rt-alert"; + file: "./gtfs.sqlite"; + dropTable: false; + } } \ No newline at end of file diff --git a/example/gtfs-static.jv b/example/gtfs-static.jv index c8bef46e..ec0ef003 100644 --- a/example/gtfs-static.jv +++ b/example/gtfs-static.jv @@ -11,115 +11,115 @@ // to a joint SQLite file with multiple tables. pipeline GtfsPipeline { - // 2. The origin for multiple pipe sequences is a zip - // file. Each file in this zip is further processed - // by its own sequence of blocks and pipes. - GTFSSampleFeedExtractor - -> AgencyInterpreter - -> AgencyLoader; - - GTFSSampleFeedExtractor - -> CalendarDatesInterpreter - -> CalendarDatesLoader; - - GTFSSampleFeedExtractor - -> CalendarInterpreter - -> CalendarLoader; - - GTFSSampleFeedExtractor - -> FareAttributesInterpreter - -> FareAttributesLoader; - - GTFSSampleFeedExtractor - -> FareRulesInterpreter - -> FareRulesLoader; - - GTFSSampleFeedExtractor - -> FrequenciesInterpreter - -> FrequenciesLoader; - - GTFSSampleFeedExtractor - -> RoutesInterpreter - -> RoutesLoader; - - GTFSSampleFeedExtractor - -> ShapesInterpreter - -> ShapesLoader; - - GTFSSampleFeedExtractor - -> StopTimesInterpreter - -> StopTimesLoader; - - GTFSSampleFeedExtractor - -> StopsInterpreter - -> StopsLoader; - - GTFSSampleFeedExtractor - -> TripsInterpreter - -> TripsLoader; - - // 3. As a first step, we download the zip file and interpret it. - block GTFSSampleFeedExtractor oftype GTFSExtractor { - url: "https://developers.google.com/static/transit/gtfs/examples/sample-feed.zip"; - } - - // 4. Next, interpret the zip files contents according to the different elements - // from the GTFS standard. - block AgencyInterpreter oftype GTFSAgencyInterpreter { } - block CalendarDatesInterpreter oftype GTFSCalendarDatesInterpreter { } - block CalendarInterpreter oftype GTFSCalendarInterpreter { } - block FareAttributesInterpreter oftype GTFSFareAttributesInterpreter { } - block FareRulesInterpreter oftype GTFSFareRulesInterpreter { } - block FrequenciesInterpreter oftype GTFSFrequenciesInterpreter { } - block RoutesInterpreter oftype GTFSRoutesInterpreter { } - block ShapesInterpreter oftype GTFSShapesInterpreter { } - block StopTimesInterpreter oftype GTFSStopTimesInterpreter { } - block StopsInterpreter oftype GTFSStopsInterpreter { } - block TripsInterpreter oftype GTFSTripsInterpreter { } - - // 5. Finally, write the interpreted tables into a SQLite database - block AgencyLoader oftype SQLiteLoader { - table: "agency"; - file: "./gtfs.sqlite"; - } - block CalendarDatesLoader oftype SQLiteLoader { - table: "calendar_dates"; - file: "./gtfs.sqlite"; - } - block CalendarLoader oftype SQLiteLoader { - table: "calendar"; - file: "./gtfs.sqlite"; - } - block FareAttributesLoader oftype SQLiteLoader { - table: "fare_attributes"; - file: "./gtfs.sqlite"; - } - block FareRulesLoader oftype SQLiteLoader { - table: "fare_rules"; - file: "./gtfs.sqlite"; - } - block FrequenciesLoader oftype SQLiteLoader { - table: "frequencies"; - file: "./gtfs.sqlite"; - } - block RoutesLoader oftype SQLiteLoader { - table: "routes"; - file: "./gtfs.sqlite"; - } - block ShapesLoader oftype SQLiteLoader { - table: "shapes"; - file: "./gtfs.sqlite"; - } - block StopTimesLoader oftype SQLiteLoader { - table: "stop_times"; - file: "./gtfs.sqlite"; - } - block StopsLoader oftype SQLiteLoader { - table: "stops"; - file: "./gtfs.sqlite"; - } - block TripsLoader oftype SQLiteLoader { - table: "trips"; - file: "./gtfs.sqlite"; - } + // 2. The origin for multiple pipe sequences is a zip + // file. Each file in this zip is further processed + // by its own sequence of blocks and pipes. + GTFSSampleFeedExtractor + -> AgencyInterpreter + -> AgencyLoader; + + GTFSSampleFeedExtractor + -> CalendarDatesInterpreter + -> CalendarDatesLoader; + + GTFSSampleFeedExtractor + -> CalendarInterpreter + -> CalendarLoader; + + GTFSSampleFeedExtractor + -> FareAttributesInterpreter + -> FareAttributesLoader; + + GTFSSampleFeedExtractor + -> FareRulesInterpreter + -> FareRulesLoader; + + GTFSSampleFeedExtractor + -> FrequenciesInterpreter + -> FrequenciesLoader; + + GTFSSampleFeedExtractor + -> RoutesInterpreter + -> RoutesLoader; + + GTFSSampleFeedExtractor + -> ShapesInterpreter + -> ShapesLoader; + + GTFSSampleFeedExtractor + -> StopTimesInterpreter + -> StopTimesLoader; + + GTFSSampleFeedExtractor + -> StopsInterpreter + -> StopsLoader; + + GTFSSampleFeedExtractor + -> TripsInterpreter + -> TripsLoader; + + // 3. As a first step, we download the zip file and interpret it. + block GTFSSampleFeedExtractor oftype GTFSExtractor { + url: "https://developers.google.com/static/transit/gtfs/examples/sample-feed.zip"; + } + + // 4. Next, interpret the zip files contents according to the different elements + // from the GTFS standard. + block AgencyInterpreter oftype GTFSAgencyInterpreter { } + block CalendarDatesInterpreter oftype GTFSCalendarDatesInterpreter { } + block CalendarInterpreter oftype GTFSCalendarInterpreter { } + block FareAttributesInterpreter oftype GTFSFareAttributesInterpreter { } + block FareRulesInterpreter oftype GTFSFareRulesInterpreter { } + block FrequenciesInterpreter oftype GTFSFrequenciesInterpreter { } + block RoutesInterpreter oftype GTFSRoutesInterpreter { } + block ShapesInterpreter oftype GTFSShapesInterpreter { } + block StopTimesInterpreter oftype GTFSStopTimesInterpreter { } + block StopsInterpreter oftype GTFSStopsInterpreter { } + block TripsInterpreter oftype GTFSTripsInterpreter { } + + // 5. Finally, write the interpreted tables into a SQLite database + block AgencyLoader oftype SQLiteLoader { + table: "agency"; + file: "./gtfs.sqlite"; + } + block CalendarDatesLoader oftype SQLiteLoader { + table: "calendar_dates"; + file: "./gtfs.sqlite"; + } + block CalendarLoader oftype SQLiteLoader { + table: "calendar"; + file: "./gtfs.sqlite"; + } + block FareAttributesLoader oftype SQLiteLoader { + table: "fare_attributes"; + file: "./gtfs.sqlite"; + } + block FareRulesLoader oftype SQLiteLoader { + table: "fare_rules"; + file: "./gtfs.sqlite"; + } + block FrequenciesLoader oftype SQLiteLoader { + table: "frequencies"; + file: "./gtfs.sqlite"; + } + block RoutesLoader oftype SQLiteLoader { + table: "routes"; + file: "./gtfs.sqlite"; + } + block ShapesLoader oftype SQLiteLoader { + table: "shapes"; + file: "./gtfs.sqlite"; + } + block StopTimesLoader oftype SQLiteLoader { + table: "stop_times"; + file: "./gtfs.sqlite"; + } + block StopsLoader oftype SQLiteLoader { + table: "stops"; + file: "./gtfs.sqlite"; + } + block TripsLoader oftype SQLiteLoader { + table: "trips"; + file: "./gtfs.sqlite"; + } } \ No newline at end of file diff --git a/example/workbooks-xlsx.jv b/example/workbooks-xlsx.jv index 7e7cbf44..a50b5b08 100644 --- a/example/workbooks-xlsx.jv +++ b/example/workbooks-xlsx.jv @@ -10,88 +10,88 @@ // from a XLSX file with multiple Sheets in the web // to a SQLite file sink. pipeline LightTrappingSiliconSolarCellsPipeline { - // 2. We directly get the xlsx file from the web via the HttpExtractor - // The data is provided under CC BY-SA 4.0 - // Saive, Rebecca (2023). Data supporting the publication: - // Light trapping in thin silicon solar cells: a review on fundamentals and technologies. - // 4TU.ResearchData. Dataset. https://doi.org/10.4121/14554815.v1 - block LightTrappingSiliconSolarCellsExtractor oftype HttpExtractor { - url: "https://figshare.com/ndownloader/files/27923598"; - } + // 2. We directly get the xlsx file from the web via the HttpExtractor + // The data is provided under CC BY-SA 4.0 + // Saive, Rebecca (2023). Data supporting the publication: + // Light trapping in thin silicon solar cells: a review on fundamentals and technologies. + // 4TU.ResearchData. Dataset. https://doi.org/10.4121/14554815.v1 + block LightTrappingSiliconSolarCellsExtractor oftype HttpExtractor { + url: "https://figshare.com/ndownloader/files/27923598"; + } - // 3. The incoming file is interpreted as a XLSX file and transformed into a Workbook - // Workbooks contain at least 1 Sheet. Every sheet has a unique name. - block LightTrappingSiliconSolarCellsTextXLSXInterpreter oftype XLSXInterpreter { } + // 3. The incoming file is interpreted as a XLSX file and transformed into a Workbook + // Workbooks contain at least 1 Sheet. Every sheet has a unique name. + block LightTrappingSiliconSolarCellsTextXLSXInterpreter oftype XLSXInterpreter { } - // 4.1 Here, we pick one sheet with the name 'RefractiveIndexSi GaAs' from the Workbook to use within our pipeline. - // The output type from SheetPicker is Sheet, which was already introduced in the cars example - block LightTrappingSiliconSolarCellsSheetpicker oftype SheetPicker { - sheetName: 'RefractiveIndexSi GaAs'; - } + // 4.1 Here, we pick one sheet with the name 'RefractiveIndexSi GaAs' from the Workbook to use within our pipeline. + // The output type from SheetPicker is Sheet, which was already introduced in the cars example + block LightTrappingSiliconSolarCellsSheetpicker oftype SheetPicker { + sheetName: 'RefractiveIndexSi GaAs'; + } - block NameHeaderWriter oftype CellWriter { - at: range F1:L1; - write: [ - "F", - "G", - "nm", - "wl", - "n2", - "k2", - "alpha (cm-1)2" - ]; - } + block NameHeaderWriter oftype CellWriter { + at: range F1:L1; + write: [ + "F", + "G", + "nm", + "wl", + "n2", + "k2", + "alpha (cm-1)2" + ]; + } - block LightTrappingSiliconSolarCellsTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - "Wavelength" oftype integer, - "Wavelength (µm)" oftype decimal, - "n" oftype decimal, - "k" oftype text, - "alpha (cm-1)" oftype text, - "nm" oftype decimal, - "n2" oftype text, - "k2" oftype decimal, - "alpha (cm-1)2" oftype decimal - ]; - } + block LightTrappingSiliconSolarCellsTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "Wavelength" oftype integer, + "Wavelength (µm)" oftype decimal, + "n" oftype decimal, + "k" oftype text, + "alpha (cm-1)" oftype text, + "nm" oftype decimal, + "n2" oftype text, + "k2" oftype decimal, + "alpha (cm-1)2" oftype decimal + ]; + } - block LightTrappingSiliconSolarCellsLoader oftype SQLiteLoader { - table: "LightTrappingSiliconSolarCells"; - file: "./LightTrappingSiliconSolarCells.sqlite"; - } + block LightTrappingSiliconSolarCellsLoader oftype SQLiteLoader { + table: "LightTrappingSiliconSolarCells"; + file: "./LightTrappingSiliconSolarCells.sqlite"; + } - // 4.2 Here, we pick another sheet named 'Wavelength thickness trapping' from the Workbook - block SecondLightTrappingSiliconSolarCellsSheetpicker oftype SheetPicker { - sheetName: 'Wavelength thickness trapping'; - } + // 4.2 Here, we pick another sheet named 'Wavelength thickness trapping' from the Workbook + block SecondLightTrappingSiliconSolarCellsSheetpicker oftype SheetPicker { + sheetName: 'Wavelength thickness trapping'; + } - block SecondLightTrappingSiliconSolarCellsTableInterpreter oftype TableInterpreter { - header: true; - columns: [ - "n" oftype decimal, - "Wavelength (µm)" oftype decimal, - ]; - } + block SecondLightTrappingSiliconSolarCellsTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "n" oftype decimal, + "Wavelength (µm)" oftype decimal, + ]; + } - block SecondLightTrappingSiliconSolarCellsLoader oftype SQLiteLoader { + block SecondLightTrappingSiliconSolarCellsLoader oftype SQLiteLoader { - table: "SecondLightTrappingSiliconSolarCells"; - file: "./LightTrappingSiliconSolarCells.sqlite"; - } + table: "SecondLightTrappingSiliconSolarCells"; + file: "./LightTrappingSiliconSolarCells.sqlite"; + } - LightTrappingSiliconSolarCellsExtractor - -> LightTrappingSiliconSolarCellsTextXLSXInterpreter - -> LightTrappingSiliconSolarCellsSheetpicker - -> NameHeaderWriter - -> LightTrappingSiliconSolarCellsTableInterpreter - -> LightTrappingSiliconSolarCellsLoader; - - // 5. Once the XLSX file is interpreted, we can split the pipeline and - // work separately on the different sheets from our input file - LightTrappingSiliconSolarCellsTextXLSXInterpreter - -> SecondLightTrappingSiliconSolarCellsSheetpicker - -> SecondLightTrappingSiliconSolarCellsTableInterpreter - -> SecondLightTrappingSiliconSolarCellsLoader; + LightTrappingSiliconSolarCellsExtractor + -> LightTrappingSiliconSolarCellsTextXLSXInterpreter + -> LightTrappingSiliconSolarCellsSheetpicker + -> NameHeaderWriter + -> LightTrappingSiliconSolarCellsTableInterpreter + -> LightTrappingSiliconSolarCellsLoader; + + // 5. Once the XLSX file is interpreted, we can split the pipeline and + // work separately on the different sheets from our input file + LightTrappingSiliconSolarCellsTextXLSXInterpreter + -> SecondLightTrappingSiliconSolarCellsSheetpicker + -> SecondLightTrappingSiliconSolarCellsTableInterpreter + -> SecondLightTrappingSiliconSolarCellsLoader; } \ No newline at end of file