Import des einheitlichen staatlichen Rechtsregisters des Bundessteuerdienstes mit Apache NiFi. Schritt 3 - Konvertieren von JSON mit JOLT

In einem der Projekte wurde es notwendig, die Prozesse zum Importieren von Daten von Systemen von Drittanbietern auf eine Microservice-Architektur zu übertragen. Als Werkzeug wird Apache NiFi ausgewählt. Als erstes Versuchsthema wurde die Einfuhr des Einheitlichen Staatsregisters der juristischen Personen des Bundessteuerdienstes gewählt.





Im vorherigen Artikel wurde eine Möglichkeit zum Konvertieren von XML in JSON mithilfe des AVRO-Schemas beschrieben.





Dieser Artikel beschreibt eine Möglichkeit zum Konvertieren von JSON mithilfe der JOLT-Spezifikation.





Gebrauchte Prozessoren und Controller

JSON in Teile teilen

Die in der vorherigen Phase erhaltene FlowFile enthält JSON mit einer Reihe von USRLE-Anweisungen für verschiedene Organisationen. Teilen wir es zunächst in Teile auf, sodass jede FlowFile eine Anweisung enthält.





Dafür verwenden wir den SplitJson- Prozessor . In den Einstellungen - Sie müssen einen JsonPath-Ausdruck angeben, um json in Teile aufzuteilen. In diesem Fall $. *





JsonPath-Dokumentation hier





Sie können hier üben





JSON-Konvertierung

Der resultierende JSON hat eine unnötig komplexe Struktur, um ihn später zu speichern und zu verarbeiten. Es ist besser, die Adresse und den vollständigen Namen in einer Zeile zu kombinieren und einige Elemente in der Hierarchie höher zu verschieben.





JSON vor der Transformation
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : {
        "type" : "",
        "name" : ""
      },
      "district" : null,
      "town" : {
        "type" : "",
        "name" : ""
      },
      "settlement" : null,
      "street" : {
        "type" : "",
        "name" : ""
      },
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012"
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



JSON JoltTransformJSON.





:





  • Jolt Transformation DSL - . Chain -





  • Jolt Specification - .





JOLT

, - .





.





- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .





JOLT
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]
      
      



modify-overwrite-beta , .. .





, : - modify-overwrite-beta - shift. - operation - spec.





, .





modify-overwrite-beta

. , . , .





.





(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"



.





"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			}
      
      



. , "region": "=concat(@(type), ' ', @(name))",



: region, type name. region, @(type)



.





(. modify-overwrite-beta) - value.





"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
      
      



, @(1,index)



. , index . .. value addressRF, addressRF index.





, =



concat



, @(1,index)



.





fias - .





. shift .





. "*"



. , .. director , .





"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			}
      
      



shift

shift JSON.





JSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : " ",
      "district" : " ",
      "town" : " ",
      "settlement" : " ",
      "street" : " ",
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null,
      "value" : "143500,  ,  ,  ,  ,  , , , ",
      "fias" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



, - , , , . , modify-overwrite-beta , . , shift - , .





shift
{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
      
      



shift . , , , . , . &



. , , &0



. . &1



, .. &



- , pre-&-post



. .. &



name, pre-name-post. . .





- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"



. , . |



.





fullName shortName "name": { "*": "&" }



.

"*"



, , name



.

"&"



, .





- .





"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			}
      
      



. . - "&2.&"



. , , . &2



address, &



- . &1



addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.









"director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ]
      
      



lastName, firstName patronymic.

inn fio .

ogrnip, typeCode typeName .

name position.

disqualification .





- , , - . , &



- [&]



.





"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			}
      
      



, fio inn. &3[&2].&



. . : &3



- director, [&2]



- , &



- fio inn.





name position. &3



- director, [&2]



- , &1



- position. &



, name , position.





Der Rest der Elemente in Position wird einfach eine Ebene gewickelt. Die Disqualifikation bleibt unverändert.





Ferner werden ähnliche Konstruktionen verwendet.





Beispiel

Und schließlich werde ich das ursprüngliche JSON, die JOLT-Spezifikation und das resultierende JSON duplizieren





Roher JSON
{
  "reportDate": "2020-05-20",
  "ogrn": "1234567890123",
  "ogrnDate": "2002-12-30",
  "inn": "1234567890",
  "kpp": "123456789",
  "opfCode": "12300",
  "opfName": "   ",
  "name": {
    "fullName": "   ",
    "shortName": ""
  },
  "address": {
    "addressRF": {
      "region": {
        "type": "",
        "name": ""
      },
      "district": null,
      "town": {
        "type": "",
        "name": ""
      },
      "settlement": null,
      "street": {
        "type": "",
        "name": ""
      },
      "index": "143500",
      "regionCode": "50",
      "kladr": "500000570000011",
      "house": null,
      "building": null,
      "apartment": null
    }
  },
  "termination": null,
  "capital": null,
  "manageOrg": null,
  "director": [
    {
      "fl": {
        "lastName": "",
        "firstName": "",
        "patronymic": "",
        "inn": "123456789012"
      },
      "position": {
        "ogrnip": null,
        "typeCode": "02",
        "typeName": "  ",
        "name": " "
      },
      "disqualification": null
    }
  ],
  "founders": {
    "founderULRF": null,
    "founderULForeign": null,
    "founderFL": [
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789012"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      },
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789021"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      }
    ],
    "founderGov": null,
    "founderPIF": null
  },
  "capitalPart": null,
  "holderReestrAO": null,
  "okved": {
    "mainOkved": {
      "code": "47.11",
      "name": "    ,  ,      "
    },
    "addOkved": null
  }
}
      
      



JOLT-Spezifikation
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]

      
      



Resultierender JSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "fullName" : "   ",
  "shortName" : "",
  "address" : {
    "kladr" : "500000570000011",
    "regionCode" : "50",
    "value" : "143500,  ,  ,  ,  ,  , , , ",
    "fias" : null
  },
  "capital" : null,
  "director" : [ {
    "fio" : "  ",
    "inn" : "123456789012",
    "ogrnip" : null,
    "typeCode" : "02",
    "typeName" : "  ",
    "position" : " ",
    "disqualification" : null
  } ],
  "founders" : {
    "founderFL" : [ {
      "fio" : "  ",
      "inn" : "123456789012",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fio" : "  ",
      "inn" : "123456789021",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ]
  },
  "capitalPart" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



Des Weiteren

Ferner sollte der resultierende JSON irgendwo zur Speicherung und weiteren Verwendung platziert werden. Das geht aber über die Erzählung hinaus. Es gibt etwas Bequemes für jemanden.








All Articles