Dataset Open Access

Keyword counts from US Presidential State of the Union Addresses and Presidential Budget Messages

Jeremy Silver; Mark Quigley


DCAT Export

<?xml version='1.0' encoding='utf-8'?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:adms="http://www.w3.org/ns/adms#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dct="http://purl.org/dc/terms/" xmlns:dctype="http://purl.org/dc/dcmitype/" xmlns:dcat="http://www.w3.org/ns/dcat#" xmlns:duv="http://www.w3.org/ns/duv#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:frapo="http://purl.org/cerif/frapo/" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:gsp="http://www.opengis.net/ont/geosparql#" xmlns:locn="http://www.w3.org/ns/locn#" xmlns:org="http://www.w3.org/ns/org#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:schema="http://schema.org/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:vcard="http://www.w3.org/2006/vcard/ns#" xmlns:wdrs="http://www.w3.org/2007/05/powder-s#">
  <rdf:Description rdf:about="https://doi.org/10.5281/zenodo.3250516">
    <rdf:type rdf:resource="http://www.w3.org/ns/dcat#Dataset"/>
    <dct:type rdf:resource="http://purl.org/dc/dcmitype/Dataset"/>
    <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://doi.org/10.5281/zenodo.3250516</dct:identifier>
    <foaf:page rdf:resource="https://doi.org/10.5281/zenodo.3250516"/>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0003-1502-6249">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0003-1502-6249</dct:identifier>
        <foaf:name>Jeremy Silver</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Melbourne</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:creator>
      <rdf:Description rdf:about="http://orcid.org/0000-0002-4430-4212">
        <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Agent"/>
        <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0002-4430-4212</dct:identifier>
        <foaf:name>Mark Quigley</foaf:name>
        <org:memberOf>
          <foaf:Organization>
            <foaf:name>University of Melbourne</foaf:name>
          </foaf:Organization>
        </org:memberOf>
      </rdf:Description>
    </dct:creator>
    <dct:title>Keyword counts from US Presidential State of the Union Addresses and Presidential Budget Messages</dct:title>
    <dct:publisher>
      <foaf:Agent>
        <foaf:name>Zenodo</foaf:name>
      </foaf:Agent>
    </dct:publisher>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#gYear">2019</dct:issued>
    <dcat:keyword>US Politics</dcat:keyword>
    <dcat:keyword>Text processing</dcat:keyword>
    <dcat:keyword>Keyword counts</dcat:keyword>
    <dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2019-06-20</dct:issued>
    <dct:language rdf:resource="http://publications.europa.eu/resource/authority/language/ENG"/>
    <owl:sameAs rdf:resource="https://zenodo.org/record/3250516"/>
    <adms:identifier>
      <adms:Identifier>
        <skos:notation rdf:datatype="http://www.w3.org/2001/XMLSchema#anyURI">https://zenodo.org/record/3250516</skos:notation>
        <adms:schemeAgency>url</adms:schemeAgency>
      </adms:Identifier>
    </adms:identifier>
    <dct:isVersionOf rdf:resource="https://doi.org/10.5281/zenodo.3250515"/>
    <owl:versionInfo>V1.0</owl:versionInfo>
    <dct:description>&lt;p&gt;Keyword counts from US Presidential State of the Union Addresses and Presidential Budget Messages. This was done using the Python scripts provided under&amp;nbsp;&lt;a href="https://github.com/JeremySilver/KeywordCountsPresidentialMessages"&gt;https://github.com/JeremySilver/KeywordCountsPresidentialMessages&lt;/a&gt;. The raw text data is from&amp;nbsp;&lt;a href="http://www.presidency.ucsb.edu/"&gt;The American Presidency Project&lt;/a&gt;&amp;nbsp;(&lt;a href="http://www.ucsb.edu/"&gt;UCSB&lt;/a&gt;), with some&amp;nbsp;Presidential Budget Messages being extracted from US Federal Budget documents available through&amp;nbsp;&lt;a href="https://fraser.stlouisfed.org/"&gt;FRASER&lt;/a&gt;&amp;nbsp;(a digital library of U.S. economic, financial, and banking history) or, for the more recent documents the website of the&amp;nbsp;&lt;a href="https://www.whitehouse.gov/"&gt;White House&lt;/a&gt;.&lt;/p&gt; &lt;p&gt;The data headings are:&lt;/p&gt; &lt;ul&gt; &lt;li&gt;pid: in most cases, this is the index for the text document as archived on&amp;nbsp;&lt;a href="http://www.presidency.ucsb.edu/"&gt;The American Presidency Project&lt;/a&gt;&amp;nbsp;website. In some cases, this was the filename of a plain-text file read directly.&lt;/li&gt; &lt;li&gt;year: Year that the message was delivered.&lt;/li&gt; &lt;li&gt;date: Date that the message was delivered.&lt;/li&gt; &lt;li&gt;name: Name of the US President delivering the message.&lt;/li&gt; &lt;li&gt;count_of_all_words: Count of all words in the document.&lt;/li&gt; &lt;li&gt;count_of_keywords: Count of all keywords encountered in that document.&lt;/li&gt; &lt;li&gt;Keyword specific columns - three per keyword. For example, for the &amp;#39;energy&amp;#39; keyword, the&amp;nbsp;&amp;#39;energy&amp;#39; column gives the number of times the &amp;#39;energy&amp;#39; keyword was counted in the message, &amp;#39;energy_pct_of_keywords&amp;#39; gives this count as a percentage of all keywords, and &amp;#39;energy_pct_of_all_words&amp;#39; gives this count as a percentage of all words&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;Below is the list of keywords that match when the search is applied to a dictionary file containing over 99,000 US English words.&lt;/p&gt; &lt;ul&gt; &lt;li&gt;energy: &amp;#39;energy&amp;#39;&lt;/li&gt; &lt;li&gt;tax: &amp;#39;nontaxable&amp;#39;, &amp;#39;overtax&amp;#39;, &amp;#39;overtaxed&amp;#39;, &amp;#39;overtaxes&amp;#39;, &amp;#39;overtaxing&amp;#39;, &amp;#39;surtax&amp;#39;, &amp;#39;surtaxed&amp;#39;, &amp;#39;surtaxes&amp;#39;, &amp;#39;surtaxing&amp;#39;, &amp;#39;surtaxs&amp;#39;, &amp;#39;tax&amp;#39;, &amp;#39;taxable&amp;#39;, &amp;#39;taxation&amp;#39;, &amp;#39;taxations&amp;#39;, &amp;#39;taxed&amp;#39;, &amp;#39;taxes&amp;#39;, &amp;#39;taxing&amp;#39;, &amp;#39;taxpayer&amp;#39;, &amp;#39;taxpayers&amp;#39;, &amp;#39;taxs&amp;#39;&lt;/li&gt; &lt;li&gt;defense: &amp;#39;defend&amp;#39;, &amp;#39;defense&amp;#39;&lt;/li&gt; &lt;li&gt;education: &amp;#39;education&amp;#39;&lt;/li&gt; &lt;li&gt;employment: &amp;#39;employ&amp;#39;, &amp;#39;employable&amp;#39;, &amp;#39;employe&amp;#39;, &amp;#39;employed&amp;#39;, &amp;#39;employee&amp;#39;, &amp;#39;employees&amp;#39;, &amp;#39;employer&amp;#39;, &amp;#39;employers&amp;#39;, &amp;#39;employes&amp;#39;, &amp;#39;employing&amp;#39;, &amp;#39;employment&amp;#39;, &amp;#39;employments&amp;#39;, &amp;#39;employs&amp;#39;, &amp;#39;underemployed&amp;#39;, &amp;#39;unemployable&amp;#39;, &amp;#39;unemployed&amp;#39;, &amp;#39;unemployeds&amp;#39;, &amp;#39;unemployment&amp;#39;, &amp;#39;unemployments&amp;#39;&lt;/li&gt; &lt;li&gt;research: &amp;#39;research&amp;#39;, &amp;#39;researched&amp;#39;, &amp;#39;researcher&amp;#39;, &amp;#39;researchers&amp;#39;, &amp;#39;researches&amp;#39;, &amp;#39;researching&amp;#39;, &amp;#39;researchs&amp;#39;&lt;/li&gt; &lt;li&gt;shooting: &amp;#39;shooting&amp;#39;&lt;/li&gt; &lt;li&gt;space: &amp;#39;space&amp;#39;&lt;/li&gt; &lt;li&gt;nuclear: &amp;#39;nuclear&amp;#39;&lt;/li&gt; &lt;li&gt;natural&amp;nbsp;resources: &amp;#39;natural&amp;nbsp;resources&amp;#39;&lt;/li&gt; &lt;li&gt;racism: &amp;#39;racism&amp;#39;, &amp;#39;civil rights&amp;#39;&lt;/li&gt; &lt;li&gt;crime: &amp;#39;crime&amp;#39;, &amp;#39;crimes&amp;#39;, &amp;#39;criminal&amp;#39;, &amp;#39;criminally&amp;#39;, &amp;#39;criminals&amp;#39;, &amp;#39;decriminalization&amp;#39;, &amp;#39;decriminalizations&amp;#39;, &amp;#39;decriminalize&amp;#39;, &amp;#39;decriminalized&amp;#39;, &amp;#39;decriminalizes&amp;#39;, &amp;#39;decriminalizing&amp;#39;&lt;/li&gt; &lt;li&gt;environment: &amp;#39;environment&amp;#39;, &amp;#39;environmental&amp;#39;, &amp;#39;environmentalism&amp;#39;, &amp;#39;environmentalisms&amp;#39;, &amp;#39;environmentalist&amp;#39;, &amp;#39;environmentalists&amp;#39;, &amp;#39;environmentally&amp;#39;, &amp;#39;environments&amp;#39;&lt;/li&gt; &lt;li&gt;religion: &amp;#39;faith&amp;#39;, &amp;#39;god&amp;#39;, &amp;#39;prayer&amp;#39;, &amp;#39;religion&amp;#39;&lt;/li&gt; &lt;li&gt;health: &amp;#39;health&amp;#39;, &amp;#39;healthful&amp;#39;, &amp;#39;healthfully&amp;#39;, &amp;#39;healthfulness&amp;#39;, &amp;#39;healthfulnesss&amp;#39;, &amp;#39;healthier&amp;#39;, &amp;#39;healthiest&amp;#39;, &amp;#39;healthily&amp;#39;, &amp;#39;healthiness&amp;#39;, &amp;#39;healthinesss&amp;#39;, &amp;#39;healths&amp;#39;, &amp;#39;healthy&amp;#39;, &amp;#39;unhealthful&amp;#39;, &amp;#39;unhealthier&amp;#39;, &amp;#39;unhealthiest&amp;#39;, &amp;#39;unhealthy&amp;#39;&lt;/li&gt; &lt;li&gt;terror: &amp;#39;terror&amp;#39;, &amp;#39;terrorism&amp;#39;, &amp;#39;terrorisms&amp;#39;, &amp;#39;terrorist&amp;#39;, &amp;#39;terrorists&amp;#39;, &amp;#39;terrorize&amp;#39;, &amp;#39;terrorized&amp;#39;, &amp;#39;terrorizes&amp;#39;, &amp;#39;terrorizing&amp;#39;, &amp;#39;terrors&amp;#39;&lt;/li&gt; &lt;li&gt;war: &amp;#39;war&amp;#39;, &amp;#39;warrior&amp;#39;, &amp;#39;warriors&amp;#39;, &amp;#39;wars&amp;#39;&lt;/li&gt; &lt;li&gt;economy: &amp;#39;economic&amp;#39;, &amp;#39;economical&amp;#39;, &amp;#39;economically&amp;#39;, &amp;#39;economics&amp;#39;, &amp;#39;economicss&amp;#39;, &amp;#39;economy&amp;#39;, &amp;#39;economys&amp;#39;, &amp;#39;microeconomics&amp;#39;, &amp;#39;microeconomicss&amp;#39;, &amp;#39;socioeconomic&amp;#39;, &amp;#39;uneconomic&amp;#39;, &amp;#39;uneconomical&amp;#39;&lt;/li&gt; &lt;li&gt;jobs: &amp;#39;jobs&amp;#39;&lt;/li&gt; &lt;li&gt;business: &amp;#39;agribusiness&amp;#39;, &amp;#39;agribusinesses&amp;#39;, &amp;#39;agribusinesss&amp;#39;, &amp;#39;business&amp;#39;, &amp;#39;businesses&amp;#39;, &amp;#39;businesslike&amp;#39;, &amp;#39;businessman&amp;#39;, &amp;#39;businessmans&amp;#39;, &amp;#39;businessmen&amp;#39;, &amp;#39;businesss&amp;#39;, &amp;#39;businesswoman&amp;#39;, &amp;#39;businesswomans&amp;#39;, &amp;#39;businesswomen&amp;#39;&lt;/li&gt; &lt;li&gt;drugs: &amp;#39;drugs&amp;#39;, &amp;#39;narcotics&amp;#39;&lt;/li&gt; &lt;li&gt;inflation: &amp;#39;inflation&amp;#39;&lt;/li&gt; &lt;li&gt;climate: &amp;#39;climate&amp;#39;&lt;/li&gt; &lt;li&gt;science: &amp;#39;science&amp;#39;, &amp;#39;sciences&amp;#39;, &amp;#39;scientific&amp;#39;, &amp;#39;scientifically&amp;#39;, &amp;#39;scientist&amp;#39;, &amp;#39;scientists&amp;#39;&lt;/li&gt; &lt;li&gt;gun: &amp;#39;gun&amp;#39;, &amp;#39;gunfire&amp;#39;, &amp;#39;gunman&amp;#39;, &amp;#39;guns&amp;#39;, &amp;#39;handgun&amp;#39;, &amp;#39;rifle&amp;#39;, &amp;#39;shotgun&amp;#39;&lt;/li&gt; &lt;li&gt;tech: &amp;#39;biotechnology&amp;#39;, &amp;#39;biotechnologys&amp;#39;, &amp;#39;technical&amp;#39;, &amp;#39;technological&amp;#39;, &amp;#39;technologically&amp;#39;, &amp;#39;technologies&amp;#39;, &amp;#39;technologist&amp;#39;, &amp;#39;technologists&amp;#39;, &amp;#39;technology&amp;#39;, &amp;#39;technologys&amp;#39;&lt;/li&gt; &lt;li&gt;military: &amp;#39;military&amp;#39;&lt;/li&gt; &lt;li&gt;security: &amp;#39;security&amp;#39;&lt;/li&gt; &lt;li&gt;housing: &amp;#39;housing&amp;#39;&lt;/li&gt; &lt;li&gt;pollution: &amp;#39;pollution&amp;#39;&lt;/li&gt; &lt;/ul&gt; &lt;p&gt;The dictionary file used is a standard file among Linux systems, and the version used was provided with version 7.1-1 of the Ubuntu &amp;#39;wamerican&amp;#39; package.&amp;nbsp;Two extra phrases, which do not appear in the dictionary file, are added to the list: &amp;#39;civil rights&amp;#39; (under the &amp;#39;racism&amp;#39; keyword) and &amp;#39;natural&amp;nbsp;resources&amp;#39; (under the &amp;#39;natural&amp;nbsp;resources&amp;#39; theme).&lt;/p&gt;</dct:description>
    <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/PUBLIC"/>
    <dct:accessRights>
      <dct:RightsStatement rdf:about="info:eu-repo/semantics/openAccess">
        <rdfs:label>Open Access</rdfs:label>
      </dct:RightsStatement>
    </dct:accessRights>
    <dcat:distribution>
      <dcat:Distribution>
        <dct:license rdf:resource="https://creativecommons.org/licenses/by/4.0/legalcode"/>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3250516"/>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3250516"/>
        <dcat:byteSize>40658</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/3250516/files/results_PBM.txt"/>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
    <dcat:distribution>
      <dcat:Distribution>
        <dcat:accessURL rdf:resource="https://doi.org/10.5281/zenodo.3250516"/>
        <dcat:byteSize>35875</dcat:byteSize>
        <dcat:downloadURL rdf:resource="https://zenodo.org/record/3250516/files/results_SoU.txt"/>
        <dcat:mediaType>text/plain</dcat:mediaType>
      </dcat:Distribution>
    </dcat:distribution>
  </rdf:Description>
</rdf:RDF>
134
29
views
downloads
All versions This version
Views 134134
Downloads 2929
Data volume 1.1 MB1.1 MB
Unique views 9999
Unique downloads 2121

Share

Cite as