import urllib.request, json
import requests
import pandas as pd
import xml.etree.ElementTree as ET
liquidacion_trimestral_uri = 'https://236ws.dpteruel.es/transparencia/dpteruel/wp-content/uploads/sites/3/2021/05/P4400000H-2021-LIQUIDACION-TRIMESTRAL-PRESUPUESTOS-1T.xml'
ns = {
'': 'http://www.xbrl.org/2003/instance',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'trimloc2017': 'http://www.meh.es/taxonomias/trimloc2017',
'trimloc2017-econ-ingr': 'http://www.meh.es/taxonomias/trimloc2017-econ-ingr',
'trimloc2017-resultp': 'http://www.meh.es/taxonomias/trimloc2017-resultp',
'trimloc2017-exist': 'http://www.meh.es/taxonomias/trimloc2017-exist',
'xbrldt': 'http://xbrl.org/2005/xbrldt',
'trimloc2017-ref': 'http://www.meh.es/taxonomias/trimloc2017-ref',
'trimloc2017-econ-gast-cuentas': 'http://www.meh.es/taxonomias/trimloc2017-econ-gast-cuentas',
'trimloc2017-prog-cuentas-econ-cruzadas': 'http://www.meh.es/taxonomias/trimloc2017-prog-cuentas-econ-cruzadas',
'link': 'http://www.xbrl.org/2003/linkbase',
'trimloc2017-econ-ingr-cuentas': 'http://www.meh.es/taxonomias/trimloc2017-econ-ingr-cuentas',
'trimloc2017-econ-gast': 'http://www.meh.es/taxonomias/trimloc2017-econ-gast',
'xlink': 'http://www.w3.org/1999/xlink',
'trimloc2017-prog': 'http://www.meh.es/taxonomias/trimloc2017-prog',
'iso4217': 'http://www.xbrl.org/2003/iso4217',
'trimloc2017-prog-cuentas': 'http://www.meh.es/taxonomias/trimloc2017-prog-cuentas',
'trimloc2017-anexo5': 'http://www.meh.es/taxonomias/trimloc2017-anexo5',
'trimloc2017-remt': 'http://www.meh.es/taxonomias/trimloc2017-remt',
'trimloc2017-anexo4': 'http://www.meh.es/taxonomias/trimloc2017-anexo4',
'trimloc2017-anexo3': 'http://www.meh.es/taxonomias/trimloc2017-anexo3',
'xbrldi': 'http://xbrl.org/2006/xbrldi',
'trimloc2017-anexo2': 'http://www.meh.es/taxonomias/trimloc2017-anexo2',
'trimloc2017-econ-ingr-importes': 'http://www.meh.es/taxonomias/trimloc2017-econ-ingr-importes',
'trimloc2017-econ-gast-importes': 'http://www.meh.es/taxonomias/trimloc2017-econ-gast-importes',
'trimloc2017-anexo1': 'http://www.meh.es/taxonomias/trimloc2017-anexo1',
'xbrli': 'http://www.xbrl.org/2003/instance'
}
r = requests.get(liquidacion_trimestral_uri)
tree = ET.ElementTree(ET.fromstring(r.text))
root = tree.getroot()
def format_explicit_member(explicit_member):
if explicit_member is not None:
return explicit_member.text, explicit_member.attrib['dimension']
return None, None
def format_typed_member(typed_member):
if typed_member is not None:
dimension = typed_member.attrib['dimension']
typed_member_value = typed_member[0].text
typed_member = typed_member.items()[0][1]
return typed_member, typed_member_value, dimension
else:
return None, None, None
def check_dimensions(explicit_dimension, typed_dimensions):
if explicit_dimension is not None:
return explicit_dimension
elif typed_dimension is not None:
return typed_dimension
contexts = root.findall("./context", ns)
entities_df = pd.DataFrame(columns=['id', 'entity', 'period', 'explicit_member', 'typed_member'])
for c in contexts:
identifier = c.attrib['id']
entity = c.find("./entity/identifier", ns).text
period = c.find("./period/instant", ns).text
explicit_member = c.find("./entity/segment/xbrldi:explicitMember", ns)
explicit_member, explicit_dimension = format_explicit_member(explicit_member)
typed_member = c.find("./entity/segment/xbrldi:typedMember", ns)
typed_member, typed_member_value, typed_dimension = format_typed_member(typed_member)
dimension = check_dimensions(explicit_dimension, typed_dimension)
entities_df = entities_df.append({
'id': identifier,
'entity': entity,
'period': period,
'explicit_member': explicit_member,
'typed_member': typed_member,
'typed_member_value': typed_member_value,
'dimension': dimension,
}, ignore_index=True)
entities_df = entities_df.set_index('id')
entities_df.shape
(1049, 6)
entities_df.describe()
entity | period | explicit_member | typed_member | dimension | typed_member_value | |
---|---|---|---|---|---|---|
count | 1049 | 1049 | 738 | 310 | 1048 | 310 |
unique | 1 | 1 | 738 | 5 | 10 | 303 |
top | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | trimloc2017-econ-ingr-cuentas:EconomicaIngresos | trimloc2017-anexo5:Filasdimension | trimloc2017-econ-gast:Cuentasdimension | Primer aval |
freq | 1049 | 1049 | 1 | 300 | 372 | 4 |
entities_df.head()
entity | period | explicit_member | typed_member | dimension | typed_member_value | |
---|---|---|---|---|---|---|
id | ||||||
IdContextoDatosBasicos | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | None | None | None | None |
IdContextosEconomicaIngresos_EconomicaIngresos | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | trimloc2017-econ-ingr-cuentas:EconomicaIngresos | None | trimloc2017-econ-ingr:Cuentasdimension | None |
IdContextosEconomicaIngresos_ImpuestosDirectos | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | trimloc2017-econ-ingr-cuentas:ImpuestosDirectos | None | trimloc2017-econ-ingr:Cuentasdimension | None |
IdContextosEconomicaIngresos_ImpuestosIndirectos | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | trimloc2017-econ-ingr-cuentas:ImpuestosIndirectos | None | trimloc2017-econ-ingr:Cuentasdimension | None |
IdContextosEconomicaIngresos_TasasPreciosPublicosYOtrosIngresos | EXCMA.DIPUTACION PROVINCIAL TERUEL | 2021-03-31 | trimloc2017-econ-ingr-cuentas:TasasPreciosPubl... | None | trimloc2017-econ-ingr:Cuentasdimension | None |
entities_df.to_csv('./input/entities.csv')