Conference paper Open Access

Integrating Domain Terminology into Neural Machine Translation

Elise Michon; Josep Crego; Jean Senellart


MARC21 XML Export

<?xml version='1.0' encoding='UTF-8'?>
<record xmlns="http://www.loc.gov/MARC21/slim">
  <leader>00000nam##2200000uu#4500</leader>
  <datafield tag="041" ind1=" " ind2=" ">
    <subfield code="a">eng</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Neural Machine Translation</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Terminology</subfield>
  </datafield>
  <datafield tag="653" ind1=" " ind2=" ">
    <subfield code="a">Domain Adaptation</subfield>
  </datafield>
  <controlfield tag="005">20210214081214.0</controlfield>
  <controlfield tag="001">4537095</controlfield>
  <datafield tag="711" ind1=" " ind2=" ">
    <subfield code="d">8-13 December 2020</subfield>
    <subfield code="g">COLING'2020</subfield>
    <subfield code="a">International Conference on Computational Linguistics</subfield>
    <subfield code="c">Barcelona</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">SYSTRAN</subfield>
    <subfield code="a">Josep Crego</subfield>
  </datafield>
  <datafield tag="700" ind1=" " ind2=" ">
    <subfield code="u">SYSTRAN</subfield>
    <subfield code="a">Jean Senellart</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="s">268382</subfield>
    <subfield code="z">md5:fc8e91b7e16396d94491fe75542f0378</subfield>
    <subfield code="u">https://zenodo.org/record/4537095/files/2020.coling-main.348.pdf</subfield>
  </datafield>
  <datafield tag="542" ind1=" " ind2=" ">
    <subfield code="l">open</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="y">Conference website</subfield>
    <subfield code="u">https://coling2020.org</subfield>
  </datafield>
  <datafield tag="260" ind1=" " ind2=" ">
    <subfield code="c">2020-12-08</subfield>
  </datafield>
  <datafield tag="909" ind1="C" ind2="O">
    <subfield code="p">openaire</subfield>
    <subfield code="p">user-787061</subfield>
    <subfield code="o">oai:zenodo.org:4537095</subfield>
  </datafield>
  <datafield tag="100" ind1=" " ind2=" ">
    <subfield code="u">SYSTRAN</subfield>
    <subfield code="a">Elise Michon</subfield>
  </datafield>
  <datafield tag="245" ind1=" " ind2=" ">
    <subfield code="a">Integrating Domain Terminology into Neural Machine Translation</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">user-787061</subfield>
  </datafield>
  <datafield tag="536" ind1=" " ind2=" ">
    <subfield code="c">787061</subfield>
    <subfield code="a">Advanced tools for fighting oNline Illegal TrAfficking</subfield>
  </datafield>
  <datafield tag="540" ind1=" " ind2=" ">
    <subfield code="u">https://creativecommons.org/licenses/by/4.0/legalcode</subfield>
    <subfield code="a">Creative Commons Attribution 4.0 International</subfield>
  </datafield>
  <datafield tag="650" ind1="1" ind2="7">
    <subfield code="a">cc-by</subfield>
    <subfield code="2">opendefinition.org</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="a">&lt;p&gt;This paper extends existing work on terminology integration into Neural Machine Translation, a common industrial practice to dynamically adapt translation to a specific domain. Our method, based on the use of placeholders complemented with morphosyntactic annotation, efficiently taps into the ability of the neural network to deal with symbolic knowledge to surpass the surface generalization shown by alternative techniques. We compare our approach to state-of-the-art systems and benchmark them through a well-defined evaluation framework, focusing on actual application of terminology and not just on the overall performance. Results indicate the suitability of our method in the use-case where terminology is used in a system trained on generic data only.&lt;/p&gt;</subfield>
  </datafield>
  <datafield tag="773" ind1=" " ind2=" ">
    <subfield code="n">doi</subfield>
    <subfield code="i">isDerivedFrom</subfield>
    <subfield code="a">10.18653/v1/2020.coling-main.348</subfield>
  </datafield>
  <datafield tag="024" ind1=" " ind2=" ">
    <subfield code="a">10.18653/v1/2020.coling-main.348</subfield>
    <subfield code="2">doi</subfield>
  </datafield>
  <datafield tag="980" ind1=" " ind2=" ">
    <subfield code="a">publication</subfield>
    <subfield code="b">conferencepaper</subfield>
  </datafield>
</record>
36
23
views
downloads
Views 36
Downloads 23
Data volume 6.2 MB
Unique views 25
Unique downloads 22

Share

Cite as