In einem der Projekte wurde es notwendig, die Prozesse zum Importieren von Daten von Systemen von Drittanbietern auf eine Microservice-Architektur zu übertragen. Als Werkzeug wird Apache NiFi ausgewählt. Als erstes Versuchsthema wurde die Einfuhr des Einheitlichen Staatsregisters der juristischen Personen des Bundessteuerdienstes gewählt.
Im vorherigen Artikel wurde eine Möglichkeit zum Konvertieren von XML in JSON mithilfe des AVRO-Schemas beschrieben.
Dieser Artikel beschreibt eine Möglichkeit zum Konvertieren von JSON mithilfe der JOLT-Spezifikation.
Gebrauchte Prozessoren und Controller
JSON in Teile teilen
Die in der vorherigen Phase erhaltene FlowFile enthält JSON mit einer Reihe von USRLE-Anweisungen für verschiedene Organisationen. Teilen wir es zunächst in Teile auf, sodass jede FlowFile eine Anweisung enthält.
Dafür verwenden wir den SplitJson- Prozessor . In den Einstellungen - Sie müssen einen JsonPath-Ausdruck angeben, um json in Teile aufzuteilen. In diesem Fall $. *
JsonPath-Dokumentation hier
Sie können hier üben
JSON-Konvertierung
Der resultierende JSON hat eine unnötig komplexe Struktur, um ihn später zu speichern und zu verarbeiten. Es ist besser, die Adresse und den vollständigen Namen in einer Zeile zu kombinieren und einige Elemente in der Hierarchie höher zu verschieben.
JSON vor der Transformation
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : {
"type" : "",
"name" : ""
},
"district" : null,
"town" : {
"type" : "",
"name" : ""
},
"settlement" : null,
"street" : {
"type" : "",
"name" : ""
},
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
JSON JoltTransformJSON.
:
Jolt Transformation DSL - . Chain -
Jolt Specification - .
JOLT
- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .
JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
modify-overwrite-beta , .. .
, : - modify-overwrite-beta - shift. - operation - spec.
, .
modify-overwrite-beta
. , . , .
.
(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"
.
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
}
. , "region": "=concat(@(type), ' ', @(name))",
: region, type name. region, @(type)
.
(. modify-overwrite-beta) - value.
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
, @(1,index)
. , index . .. value addressRF, addressRF index.
, =
concat
, @(1,index)
.
fias - .
. shift .
. "*"
. , .. director , .
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
shift
shift JSON.
JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : " ",
"district" : " ",
"town" : " ",
"settlement" : " ",
"street" : " ",
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null,
"value" : "143500, , , , , , , , ",
"fias" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
, - , , , . , modify-overwrite-beta , . , shift - , .
shift
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
shift . , , , . , . &
. , , &0
. . &1
, .. &
- , pre-&-post
. .. &
name, pre-name-post. . .
- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"
. , . |
.
fullName shortName "name": { "*": "&" }
.
"*"
, , name
.
"&"
, .
- .
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
}
. . - "&2.&"
. , , . &2
address, &
- . &1
addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ]
lastName, firstName patronymic.
inn fio .
ogrnip, typeCode typeName .
name position.
disqualification .
- , , - . , &
- [&]
.
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
}
, fio inn. &3[&2].&
. . : &3
- director, [&2]
- , &
- fio inn.
name position. &3
- director, [&2]
- , &1
- position. &
, name , position.
Der Rest der Elemente in Position wird einfach eine Ebene gewickelt. Die Disqualifikation bleibt unverändert.
Ferner werden ähnliche Konstruktionen verwendet.
Beispiel
Und schließlich werde ich das ursprüngliche JSON, die JOLT-Spezifikation und das resultierende JSON duplizieren
Roher JSON
{
"reportDate": "2020-05-20",
"ogrn": "1234567890123",
"ogrnDate": "2002-12-30",
"inn": "1234567890",
"kpp": "123456789",
"opfCode": "12300",
"opfName": " ",
"name": {
"fullName": " ",
"shortName": ""
},
"address": {
"addressRF": {
"region": {
"type": "",
"name": ""
},
"district": null,
"town": {
"type": "",
"name": ""
},
"settlement": null,
"street": {
"type": "",
"name": ""
},
"index": "143500",
"regionCode": "50",
"kladr": "500000570000011",
"house": null,
"building": null,
"apartment": null
}
},
"termination": null,
"capital": null,
"manageOrg": null,
"director": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"position": {
"ogrnip": null,
"typeCode": "02",
"typeName": " ",
"name": " "
},
"disqualification": null
}
],
"founders": {
"founderULRF": null,
"founderULForeign": null,
"founderFL": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
},
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789021"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
}
],
"founderGov": null,
"founderPIF": null
},
"capitalPart": null,
"holderReestrAO": null,
"okved": {
"mainOkved": {
"code": "47.11",
"name": " , , "
},
"addOkved": null
}
}
JOLT-Spezifikation
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
Resultierender JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"fullName" : " ",
"shortName" : "",
"address" : {
"kladr" : "500000570000011",
"regionCode" : "50",
"value" : "143500, , , , , , , , ",
"fias" : null
},
"capital" : null,
"director" : [ {
"fio" : " ",
"inn" : "123456789012",
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"position" : " ",
"disqualification" : null
} ],
"founders" : {
"founderFL" : [ {
"fio" : " ",
"inn" : "123456789012",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fio" : " ",
"inn" : "123456789021",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ]
},
"capitalPart" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
Des Weiteren
Ferner sollte der resultierende JSON irgendwo zur Speicherung und weiteren Verwendung platziert werden. Das geht aber über die Erzählung hinaus. Es gibt etwas Bequemes für jemanden.