{
  "access": {
    "embargo": {
      "active": false,
      "reason": null
    },
    "files": "public",
    "record": "public",
    "status": "open"
  },
  "created": "2026-04-24T10:32:27.383436+00:00",
  "custom_fields": {
    "meeting:meeting": {
      "dates": "20 abril 2026",
      "place": "Madrid",
      "title": "Seminario IA y humanidades digitales para la prensa hist\u00f3rica",
      "url": "https://linhd.uned.es/seminario-ia-y-humanidades-digitales-para-la-prensa-historica/"
    }
  },
  "deletion_status": {
    "is_deleted": false,
    "status": "P"
  },
  "files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "poster_academico_solr_dariah.pdf": {
        "access": {
          "hidden": false
        },
        "checksum": "md5:9c7c0b2c30532af9867ebadb26e1c5c2",
        "ext": "pdf",
        "id": "2139dc16-3a45-4009-9323-b7e7ab81c9af",
        "key": "poster_academico_solr_dariah.pdf",
        "links": {
          "content": "https://zenodo.org/api/records/19693510/files/poster_academico_solr_dariah.pdf/content",
          "iiif_api": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/full/0/default.png",
          "iiif_base": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf",
          "iiif_canvas": "https://zenodo.org/api/iiif/record:19693510/canvas/poster_academico_solr_dariah.pdf",
          "iiif_info": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/info.json",
          "self": "https://zenodo.org/api/records/19693510/files/poster_academico_solr_dariah.pdf"
        },
        "metadata": {
          "height": 3370,
          "width": 2384
        },
        "mimetype": "application/pdf",
        "size": 2361006,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 2361006
  },
  "id": "19693510",
  "is_draft": false,
  "is_published": true,
  "links": {
    "access": "https://zenodo.org/api/records/19693510/access",
    "access_grants": "https://zenodo.org/api/records/19693510/access/grants",
    "access_links": "https://zenodo.org/api/records/19693510/access/links",
    "access_request": "https://zenodo.org/api/records/19693510/access/request",
    "access_users": "https://zenodo.org/api/records/19693510/access/users",
    "archive": "https://zenodo.org/api/records/19693510/files-archive",
    "archive_media": "https://zenodo.org/api/records/19693510/media-files-archive",
    "communities": "https://zenodo.org/api/records/19693510/communities",
    "communities-suggestions": "https://zenodo.org/api/records/19693510/communities-suggestions",
    "doi": "https://doi.org/10.5281/zenodo.19693510",
    "draft": "https://zenodo.org/api/records/19693510/draft",
    "file_modification": "https://zenodo.org/api/records/19693510/file-modification",
    "files": "https://zenodo.org/api/records/19693510/files",
    "latest": "https://zenodo.org/api/records/19693510/versions/latest",
    "latest_html": "https://zenodo.org/records/19693510/latest",
    "media_files": "https://zenodo.org/api/records/19693510/media-files",
    "parent": "https://zenodo.org/api/records/19693509",
    "parent_doi": "https://doi.org/10.5281/zenodo.19693509",
    "parent_doi_html": "https://zenodo.org/doi/10.5281/zenodo.19693509",
    "parent_html": "https://zenodo.org/records/19693509",
    "preview_html": "https://zenodo.org/records/19693510?preview=1",
    "quota_increase": "https://zenodo.org/api/records/19693510/quota-increase",
    "request_deletion": "https://zenodo.org/api/records/19693510/request-deletion",
    "requests": "https://zenodo.org/api/records/19693510/requests",
    "reserve_doi": "https://zenodo.org/api/records/19693510/draft/pids/doi",
    "self": "https://zenodo.org/api/records/19693510",
    "self_doi": "https://doi.org/10.5281/zenodo.19693510",
    "self_doi_html": "https://zenodo.org/doi/10.5281/zenodo.19693510",
    "self_html": "https://zenodo.org/records/19693510",
    "self_iiif_manifest": "https://zenodo.org/api/iiif/record:19693510/manifest",
    "self_iiif_sequence": "https://zenodo.org/api/iiif/record:19693510/sequence/default",
    "thumbnails": {
      "10": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E10,/0/default.jpg",
      "100": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E100,/0/default.jpg",
      "1200": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E1200,/0/default.jpg",
      "250": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E250,/0/default.jpg",
      "50": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E50,/0/default.jpg",
      "750": "https://zenodo.org/api/iiif/record:19693510:poster_academico_solr_dariah.pdf/full/%5E750,/0/default.jpg"
    },
    "versions": "https://zenodo.org/api/records/19693510/versions"
  },
  "media_files": {
    "count": 1,
    "enabled": true,
    "entries": {
      "poster_academico_solr_dariah.pdf.ptif": {
        "access": {
          "hidden": true
        },
        "ext": "ptif",
        "id": "088bfb21-5596-4664-ae1e-ea1320a61d6f",
        "key": "poster_academico_solr_dariah.pdf.ptif",
        "links": {
          "content": "https://zenodo.org/api/records/19693510/files/poster_academico_solr_dariah.pdf.ptif/content",
          "self": "https://zenodo.org/api/records/19693510/files/poster_academico_solr_dariah.pdf.ptif"
        },
        "metadata": null,
        "mimetype": "application/octet-stream",
        "processor": {
          "source_file_id": "2139dc16-3a45-4009-9323-b7e7ab81c9af",
          "status": "finished",
          "type": "image-tiles"
        },
        "size": 0,
        "storage_class": "L"
      }
    },
    "order": [],
    "total_bytes": 0
  },
  "metadata": {
    "additional_titles": [
      {
        "lang": {
          "id": "eng",
          "title": {
            "en": "English"
          }
        },
        "title": "\"Reales Sitios\" with AI: Text Extraction, Embeddings, and Semantic Search with Apache SOLR",
        "type": {
          "id": "translated-title",
          "title": {
            "de": "\u00dcbersetzter Titel",
            "en": "Translated title"
          }
        }
      }
    ],
    "creators": [
      {
        "affiliations": [
          {
            "id": "03f6y4g19",
            "identifiers": [
              {
                "identifier": "03f6y4g19",
                "scheme": "ror"
              },
              {
                "identifier": "grid.494003.a",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2243 3876",
                "scheme": "isni"
              }
            ],
            "name": "Patrimonio Nacional"
          }
        ],
        "person_or_org": {
          "family_name": "Rodr\u00edguez G\u00f3mez",
          "given_name": "Jos\u00e9 Luis",
          "identifiers": [
            {
              "identifier": "0000-0003-0888-0084",
              "scheme": "orcid"
            }
          ],
          "name": "Rodr\u00edguez G\u00f3mez, Jos\u00e9 Luis",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "id": "02msb5n36",
            "identifiers": [
              {
                "identifier": "02msb5n36",
                "scheme": "ror"
              },
              {
                "identifier": "grid.10702.34",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2308 8920",
                "scheme": "isni"
              }
            ],
            "name": "National University of Distance Education"
          }
        ],
        "person_or_org": {
          "family_name": "Ros",
          "given_name": "Salvador",
          "identifiers": [
            {
              "identifier": "0000-0001-6330-4958",
              "scheme": "orcid"
            }
          ],
          "name": "Ros, Salvador",
          "type": "personal"
        }
      },
      {
        "affiliations": [
          {
            "id": "03f6y4g19",
            "identifiers": [
              {
                "identifier": "03f6y4g19",
                "scheme": "ror"
              },
              {
                "identifier": "grid.494003.a",
                "scheme": "grid"
              },
              {
                "identifier": "0000 0001 2243 3876",
                "scheme": "isni"
              }
            ],
            "name": "Patrimonio Nacional"
          }
        ],
        "person_or_org": {
          "family_name": "Nicl\u00f3s Ferreras",
          "given_name": "Ester",
          "name": "Nicl\u00f3s Ferreras, Ester",
          "type": "personal"
        }
      }
    ],
    "dates": [
      {
        "date": "2026-04-01",
        "description": "DARIAH UNED",
        "type": {
          "id": "accepted",
          "title": {
            "de": "Angenommen",
            "en": "Accepted"
          }
        }
      }
    ],
    "description": "<p>La revista <em>Reales Sitios</em>, objeto de esta propuesta, nace en 1964 como apoyo a la investigaci&oacute;n y divulgaci&oacute;n de los bienes muse&iacute;sticos de&nbsp;Patrimonio Nacional. En su redacci&oacute;n han tenido un papel fundamental los conservadores y restauradores del organismo, en su mayor&iacute;a&nbsp;historiadores del arte especializados en los periodos hist&oacute;ricos y en las colecciones a su cargo, junto con especialistas del &aacute;mbito acad&eacute;mico&nbsp;con l&iacute;neas de investigaci&oacute;n abiertas sobre estos fondos.</p>\n<p><br>Esta publicaci&oacute;n se ha difundido hasta el presente en soporte papel y su presencia en las bibliotecas universitarias no est&aacute; generalizada, por lo&nbsp;que uno de los recursos m&aacute;s habituales para su consulta ha sido la visita f&iacute;sica a la Real Biblioteca de Palacio, que conserva varios juegos&nbsp;completos de esta publicaci&oacute;n peri&oacute;dica. Para solventar esta barrera, y dar respuesta al continuo inter&eacute;s acad&eacute;mico y cient&iacute;fico que suscitan sus&nbsp;contenidos, recientemente se ha iniciado un plan de digitalizaci&oacute;n que culminar&aacute; con la publicaci&oacute;n en internet de la revista completa a trav&eacute;s&nbsp;de la plataforma Open Journal System y Dspace, sistema este &uacute;ltimo que da soporte al Repositorio Institucional de Objetos Digitales.</p>\n<p><br>La presente propuesta de proyecto toma como materiales una muestra representativa de los vol&uacute;menes digitalizados Para la obtenci&oacute;n de los&nbsp;metadatos se parte de la instalaci&oacute;n de Open Journal System, a la que se ha incorporado ya buena parte de los n&uacute;meros de la revista. Respecto&nbsp;a la extracci&oacute;n del texto de los art&iacute;culos, se examinan distintas alternativas para optimizar el proceso, obstaculizado por el dise&ntilde;o de revista&nbsp;(im&aacute;genes, columnas, publicidad, curvaturas, orientaci&oacute;n, etc.), siempre apoy&aacute;ndose en librer&iacute;as de Python, basadas en Deep Learning y<br>Transformers.</p>\n<p><br>Con este punto de partida -- subconjunto de metadatos extra&iacute;dos de OJS, por una parte, y el texto de calidad finalmente obtenido, por otra-- se&nbsp;desarrollar&aacute; una propuesta de b&uacute;squeda sem&aacute;ntica sobre este corpus, que constituye la finalidad del proyecto. Para ello, se utilizar&aacute; la &uacute;ltima&nbsp;versi&oacute;n de Apache SOLR, que soporta la indexaci&oacute;n con embeddings, para cuya generaci&oacute;n y carga en SOLR se utilizar&aacute; un script de Python con&nbsp;las librer&iacute;as openai y request, y, experimentalmente, aplicaremos tambi&eacute;n el m&oacute;dulo Text to Vector de SOLR.</p>\n<p><br>Por lo que respecta a la recuperaci&oacute;n de informaci&oacute;n basada en vectores, se utilizar&aacute; el algoritmo HNSW (Hierarchical Navigable Small World&nbsp;Graph), que es el que incorpora SOLR. El modelo de recuperaci&oacute;n ser&aacute; h&iacute;brido, ya que hemos mantenido tambi&eacute;n un campo textual que incluye&nbsp;el texto del art&iacute;culo. Por &uacute;ltimo, esta configuraci&oacute;n de SOLR se integrar&aacute; en los actuales sistemas de consulta y preservaci&oacute;n digital de&nbsp;Patrimonio Nacional, en concreto, OJS y el Repertorio Institucional de Objetos Digitales, en Dspace 9.&nbsp;En el p&oacute;ster se puede seguir el flujo de procesos y las herramientas utilizada, la configuraci&oacute;n de SOLR para la indexaci&oacute;n de Reales Sitios, un&nbsp;caso de b&uacute;squeda sem&aacute;ntica, y los pasos siguientes relativos a la integraci&oacute;n del motor de b&uacute;squedas en los sistemas OJS y Dspace.</p>",
    "languages": [
      {
        "id": "spa",
        "title": {
          "en": "Spanish"
        }
      }
    ],
    "publication_date": "2026-04-22",
    "publisher": "Zenodo",
    "resource_type": {
      "id": "poster",
      "title": {
        "de": "Poster",
        "en": "Poster"
      }
    },
    "rights": [
      {
        "description": {
          "en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
        },
        "icon": "cc-by-icon",
        "id": "cc-by-4.0",
        "props": {
          "scheme": "spdx",
          "url": "https://creativecommons.org/licenses/by/4.0/legalcode"
        },
        "title": {
          "en": "Creative Commons Attribution 4.0 International"
        }
      }
    ],
    "subjects": [
      {
        "subject": "semantic search"
      },
      {
        "subject": "apache solr"
      },
      {
        "id": "mesh:D009144",
        "scheme": "MeSH",
        "subject": "Museums"
      }
    ],
    "title": "Reales Sitios con IA: Extracci\u00f3n de texto, embeddings y b\u00fasqueda sem\u00e1ntica con Apache SOLR"
  },
  "parent": {
    "access": {
      "owned_by": {
        "user": "1620326"
      },
      "settings": {
        "accept_conditions_text": null,
        "allow_guest_requests": false,
        "allow_user_requests": false,
        "secret_link_expiration": 0
      }
    },
    "communities": {
      "default": "26b93239-9ce1-400d-badc-b0a880c05af9",
      "entries": [
        {
          "access": {
            "member_policy": "open",
            "members_visibility": "public",
            "record_submission_policy": "open",
            "review_policy": "closed",
            "visibility": "public"
          },
          "children": {
            "allow": false
          },
          "created": "2025-10-22T07:40:02.972618+00:00",
          "custom_fields": {},
          "deletion_status": {
            "is_deleted": false,
            "status": "P"
          },
          "id": "26b93239-9ce1-400d-badc-b0a880c05af9",
          "links": {},
          "metadata": {
            "description": "A multidisciplinary group of experts in social sciences, arts, and humanities, artificial intelligence, language technologies, computer science, and high-performance computing. Its mission is to coordinate Spain's participation in CLARIN and DARIAH",
            "organizations": [
              {
                "id": "000xsnr85"
              },
              {
                "id": "02p0gd045"
              },
              {
                "id": "02msb5n36"
              },
              {
                "id": "05t8bcz72"
              },
              {
                "id": "0122p5f64"
              },
              {
                "id": "030eybx10"
              },
              {
                "id": "02cr20t12"
              },
              {
                "id": "edmo:1672"
              },
              {
                "id": "05sd8tv96"
              },
              {
                "id": "01teme464"
              },
              {
                "name": "Fundaci\u00f3n Dialnet"
              },
              {
                "id": "04s33wy62"
              }
            ],
            "title": "CLARIAH-ES",
            "type": {
              "id": "organization"
            },
            "website": "https://www.clariah.es/es"
          },
          "revision_id": 5,
          "slug": "clariah-es",
          "updated": "2025-10-22T07:57:15.033784+00:00"
        }
      ],
      "ids": [
        "26b93239-9ce1-400d-badc-b0a880c05af9"
      ]
    },
    "id": "19693509",
    "pids": {
      "doi": {
        "client": "datacite",
        "identifier": "10.5281/zenodo.19693509",
        "provider": "datacite"
      }
    }
  },
  "pids": {
    "doi": {
      "client": "datacite",
      "identifier": "10.5281/zenodo.19693510",
      "provider": "datacite"
    },
    "oai": {
      "identifier": "oai:zenodo.org:19693510",
      "provider": "oai"
    }
  },
  "revision_id": 6,
  "stats": {
    "all_versions": {
      "data_volume": 174714444.0,
      "downloads": 74,
      "unique_downloads": 48,
      "unique_views": 32,
      "views": 44
    },
    "this_version": {
      "data_volume": 174714444.0,
      "downloads": 74,
      "unique_downloads": 48,
      "unique_views": 32,
      "views": 44
    }
  },
  "status": "published",
  "swh": {},
  "updated": "2026-04-24T10:52:57.168861+00:00",
  "versions": {
    "index": 1,
    "is_latest": true
  }
}