Conference paper Open Access

The SSIX Corpora: Three Gold Standard Corpora for Sentiment Analysis in English, Spanish and German Financial Microblogs

Gaillat, Thomas; Zarrouk, Manel; Freitas, André; Davis, Brian


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nam##2200000uu#4500</leader>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Sentiment Analysis</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Opinion</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Corpus</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Finance</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Stock-market</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Microblogs</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Polarity</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Annotation</subfield>
  </datafield>
  <controlfield tag="005">20200120173306.0</controlfield>
  <controlfield tag="001">1247055</controlfield>
  <datafield tag="711" ind1=" " ind2=" ">
    <subfield code="d">7-12 May 2018</subfield>
    <subfield code="g">LREC 2018</subfield>
    <subfield code="a">Eleventh International Conference on Language Resources and Evaluation</subfield>
    <subfield code="c">Miyazaki, Japan</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Insight Centre for Data Analytics NUIG, Galway Ireland</subfield>
    <subfield code="a">Zarrouk, Manel</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">University of Passau, Passau Germany</subfield>
    <subfield code="a">Freitas, André</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">Maynooth University, Maynooth Ireland</subfield>
    <subfield code="a">Davis, Brian</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">146285</subfield>
    <subfield code="z">md5:1928b6cc6e79af73b8bc2aabe3e49710</subfield>
    <subfield code="u">https://zenodo.org/record/1247055/files/564.pdf</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="y">Conference website</subfield>
    <subfield code="u">http://www.lrec-conf.org/proceedings/lrec2018/index.html</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2018-05-12</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire</subfield>
    <subfield code="o">oai:zenodo.org:1247055</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">Insight Centre for Data Analytics NUIG, Galway Ireland</subfield>
    <subfield code="a">Gaillat, Thomas</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">The SSIX Corpora: Three Gold Standard Corpora for Sentiment Analysis in English, Spanish and German Financial Microblogs</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="c">645425</subfield>
    <subfield code="a">Social Sentiment analysis financial IndeXes</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by-nc/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution Non Commercial 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;This paper introduces the three SSIX corpora for sentiment analysis. These corpora address the need to provide annotated data for supervised learning methods. They focus on stock-market related messages extracted from two financial microblog platforms, i.e., StockTwits and Twitter. In total, they include 2,886 messages with opinion targets. These messages are provided with polarity annotation set on a continuous scale by three or four experts in each language. The annotation information identifies the targets with a sentiment score. The annotation process includes manual annotation verified and consolidated by financial experts. The creation of the annotated corpora took into account principled sampling strategies as well as inter-annotator agreement before consolidation in order to maximize data quality.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">isbn</subfield>
    <subfield code="i">isPartOf</subfield>
    <subfield code="a">979-10-95546-00-9</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isVersionOf</subfield>
    <subfield code="a">10.5281/zenodo.1247054</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.5281/zenodo.1247055</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">publication</subfield>
    <subfield code="b">conferencepaper</subfield>
  </datafield>
</record>
199
83
views
downloads
All versions This version
Views 199200
Downloads 8383
Data volume 12.1 MB12.1 MB
Unique views 186187
Unique downloads 7676

Share

Cite as