<?xml version="1.0" encoding="utf-8"?><!DOCTYPE article  PUBLIC '-//OASIS//DTD DocBook XML V4.4//EN'  'http://www.docbook.org/xml/4.4/docbookx.dtd'><article><articleinfo><title>About</title><revhistory><revision><revnumber>93</revnumber><date>2016-10-06 18:42:54</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>92</revnumber><date>2016-08-12 11:30:43</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>91</revnumber><date>2015-03-30 10:28:33</date><authorinitials>MateuszKopec</authorinitials></revision><revision><revnumber>90</revnumber><date>2015-03-30 10:25:57</date><authorinitials>MateuszKopec</authorinitials></revision><revision><revnumber>89</revnumber><date>2015-03-30 10:24:54</date><authorinitials>MateuszKopec</authorinitials></revision><revision><revnumber>88</revnumber><date>2015-03-30 10:24:25</date><authorinitials>MateuszKopec</authorinitials></revision><revision><revnumber>87</revnumber><date>2015-01-14 16:03:30</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>86</revnumber><date>2015-01-14 16:03:19</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>85</revnumber><date>2015-01-14 16:03:10</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>84</revnumber><date>2015-01-14 16:02:46</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>83</revnumber><date>2015-01-14 16:01:39</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>82</revnumber><date>2015-01-14 16:00:57</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>81</revnumber><date>2015-01-14 15:59:22</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>80</revnumber><date>2015-01-14 15:57:55</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>79</revnumber><date>2015-01-14 15:57:14</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>78</revnumber><date>2015-01-14 15:56:57</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>77</revnumber><date>2015-01-14 15:56:44</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>76</revnumber><date>2015-01-14 15:56:33</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>75</revnumber><date>2015-01-11 00:30:35</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>74</revnumber><date>2014-12-18 19:36:05</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>73</revnumber><date>2014-08-21 06:24:52</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>72</revnumber><date>2014-08-20 13:14:16</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>71</revnumber><date>2014-08-20 12:19:04</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>70</revnumber><date>2014-08-20 12:18:54</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>69</revnumber><date>2014-08-20 12:13:28</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>68</revnumber><date>2014-08-20 12:13:13</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>67</revnumber><date>2014-08-20 12:12:25</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>66</revnumber><date>2014-08-20 12:00:05</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>65</revnumber><date>2014-08-20 11:59:54</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>64</revnumber><date>2014-08-20 11:59:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>63</revnumber><date>2014-08-20 11:57:49</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>62</revnumber><date>2014-08-20 11:57:39</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>61</revnumber><date>2014-08-20 11:55:51</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>60</revnumber><date>2014-08-20 11:54:36</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>59</revnumber><date>2014-08-20 11:52:21</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>58</revnumber><date>2014-08-20 11:52:05</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>57</revnumber><date>2014-08-20 11:47:49</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>56</revnumber><date>2014-08-20 11:43:10</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>55</revnumber><date>2014-08-20 11:42:28</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>54</revnumber><date>2014-08-20 11:36:45</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>53</revnumber><date>2014-08-20 11:21:51</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>52</revnumber><date>2014-08-19 15:43:56</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>51</revnumber><date>2014-08-19 15:06:01</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>50</revnumber><date>2014-08-19 15:04:53</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>49</revnumber><date>2014-08-19 15:00:38</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>48</revnumber><date>2014-08-19 15:00:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>47</revnumber><date>2014-08-19 14:37:47</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>46</revnumber><date>2014-08-19 13:15:17</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>45</revnumber><date>2014-08-19 12:59:31</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>44</revnumber><date>2014-08-19 12:58:25</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>43</revnumber><date>2014-08-19 12:55:59</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>42</revnumber><date>2014-08-19 12:55:49</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>41</revnumber><date>2014-08-19 12:53:21</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>40</revnumber><date>2014-08-19 12:53:08</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>39</revnumber><date>2014-08-19 12:52:59</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>38</revnumber><date>2014-08-19 12:52:31</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>37</revnumber><date>2014-08-19 12:48:53</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>36</revnumber><date>2014-08-19 12:48:27</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>35</revnumber><date>2014-08-19 12:48:02</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>34</revnumber><date>2014-08-19 12:47:25</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>33</revnumber><date>2014-08-19 12:47:16</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>32</revnumber><date>2014-08-19 12:47:08</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>31</revnumber><date>2014-08-19 12:46:59</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>30</revnumber><date>2014-08-19 12:46:49</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>29</revnumber><date>2014-08-19 12:46:40</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>28</revnumber><date>2014-08-19 12:46:31</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>27</revnumber><date>2014-08-19 12:45:51</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>26</revnumber><date>2014-08-19 12:45:44</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>25</revnumber><date>2014-08-19 12:45:33</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>24</revnumber><date>2014-08-19 12:44:40</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>23</revnumber><date>2014-08-19 12:44:09</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>22</revnumber><date>2014-08-19 12:24:46</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>21</revnumber><date>2014-08-19 12:24:33</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>20</revnumber><date>2014-08-19 12:23:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>19</revnumber><date>2014-08-19 12:22:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>18</revnumber><date>2014-08-19 12:21:49</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>17</revnumber><date>2014-08-19 12:20:47</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>16</revnumber><date>2014-08-19 12:19:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>15</revnumber><date>2014-08-19 12:18:41</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>14</revnumber><date>2014-08-19 12:18:35</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>13</revnumber><date>2014-08-19 12:18:29</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>12</revnumber><date>2014-08-19 12:18:23</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>11</revnumber><date>2014-08-19 12:18:14</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>10</revnumber><date>2014-08-19 12:15:44</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>9</revnumber><date>2014-08-19 12:15:37</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>8</revnumber><date>2014-08-19 12:15:29</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>7</revnumber><date>2014-08-19 12:15:00</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>6</revnumber><date>2014-08-19 11:15:18</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>5</revnumber><date>2014-08-19 11:14:53</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>4</revnumber><date>2014-08-19 11:04:48</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>3</revnumber><date>2014-08-19 11:04:23</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>2</revnumber><date>2014-08-19 11:03:42</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>1</revnumber><date>2014-08-19 11:03:29</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision></revhistory></articleinfo><section><title>The CORE project</title><para>The work reported here was carried out within the <emphasis>Computer-based methods for coreference resolution in Polish texts</emphasis> (PL: <emphasis>Komputerowe metody identyfikacji nawiązań w tekstach polskich</emphasis>) project financed by the Polish <ulink url="http://www.ncn.gov.pl/?language=en">National Science Centre</ulink> (contract number 6505/B/T02/2011/40) and carried out between April 2011 and July 2014 at the <ulink url="http://www2.ipipan.waw.pl/index.php/en/">Institute of Computer Science, Polish Academy of Sciences</ulink>. It was targeted at the creation of innovative methods and tools for automated coreference resolution in Polish, with planned quality compared to state-of-the-art tools available for other languages. </para><section><title>Project results</title><itemizedlist><listitem><para><ulink url="http://zil.ipipan.waw.pl/PolishCoreferenceCorpus">Polish Coreference Corpus</ulink> </para></listitem><listitem><para><ulink url="http://zil.ipipan.waw.pl/PolishCoreferenceTools">Polish Coreference Tools</ulink> </para></listitem></itemizedlist></section><section><title>Project book</title><para><ulink url="http://www.degruyter.com/view/product/428667">Ogrodniczuk, Głowińska, Kopeć, Savary, Zawisławska – Coreference: Annotation, Resolution and Evaluation in Polish</ulink>  </para><para>A monograph published by Walther De Gruyter. The book presents work on coreference understanding, annotation and resolution of a Slavic language which can be applied to natural language processing in computers and software using English and other languages. The book presents specificities of reference, anaphora and coreference in Polish, establish identity-of-reference annotation model and present methodology used to create the corpus of Polish general nominal coreference. Various resolution approaches are presented, followed by their evaluation. By presenting the subsequent steps of building a coreference-related component of the natural language processing toolset, the volume also serves as a reference book on state-of-the art methods in carrying out coreference projects for new languages and a tutorial for NLP practitioners. </para><para>Please cite: <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogr%3Aetal%3A15%3Agruyter"><inlinemediaobject><imageobject><imagedata fileref="http://core.ipipan.waw.pl/About?action=AttachFile&amp;do=get&amp;target=bibtex.png"/></imageobject><textobject><phrase>http://publications.nlp.ipipan.waw.pl/bibtex/ogr%3Aetal%3A15%3Agruyter</phrase></textobject></inlinemediaobject></ulink> </para></section><section><title>Earlier publications and project presentations</title><orderedlist numeration="arabic"><listitem><para><ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=poltal.pdf">Ogrodniczuk, Wójcicka, Głowińska, Kopeć – Nested Mention Detection for Polish Coreference Resolution</ulink></para><para><ulink url="http://poltal.ipipan.waw.pl/">PolTAL 2014</ulink> (September 17-19, 2014, Warsaw) conference paper describing the results of creating a shallow grammar of Polish capable of detecting multi-level nested nominal phrases, intended to be used as mentions in coreference resolution tasks. The work is based on existing grammar developed for the National Corpus of Polish and evaluated on manually annotated Polish Coreference Corpus.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/zaw:ogro:14:cl.pdf">Zawisławska, Ogrodniczuk – The same or just much the same? Problems with coreference from the reader’s perspective</ulink></para><para>A Peter Lang book chapter and <ulink url="https://sites.google.com/site/coglin2012/">Cognitive Linguistics in the Year 2012</ulink> (September 17-18, 2012) conference paper presenting problems related to coreference annotation in the Polish Coreference corpus. There are three main causes of annotator errors: grammatical (e.g. the lack of an article system in Polish), semantic (the so-called co-extension, involving lexical relations between words) and cognitive (the annotators’ insufficient real-world knowledge about certain relationships). Apart from provided examples of different kinds of annotation problems, the paper analyzes how coreference relates to identity in extralinguistic reality and in discourse. It also discusses the distinction between coreference and anaphora, as well as dependence of coreference on specific properties of Polish grammar. It questions M. Recasens’ theory of near-identity and the need for its detailed classification.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/zaw%3Aogro%3A14%3Acl">BibTeX citation</ulink>, <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=wroclaw-2012.pdf">conference presentation</ulink> and <ulink url="http://www.peterlang.com/index.cfm?event=cmp.ccc.seitenstruktur.detailseiten&amp;seitentyp=produkt&amp;pk=74449&amp;cid=462">the book link</ulink>.</para></listitem><listitem><para><ulink url="http://www.lrec-conf.org/proceedings/lrec2014/pdf/1088_Paper.pdf">Ogrodniczuk, Kopeć, Savary – Polish Coreference Corpus in Numbers</ulink> </para><para><ulink url="http://lrec2014.lrec-conf.org/en/">LREC 2014</ulink> (May 26-31, 2014, Reykjavik) conference paper on a preliminary interpretation of the occurrence of different types of linguistic constructs in the manually-annotated Polish Coreference Corpus by providing analyses of various statistical properties related to mentions, clusters and near-identity links. Among others, frequency of mentions, zero subjects and singleton clusters is presented, as well as the average mention and cluster size. We also show that some coreference clustering constraints, such as gender or number agreement, are frequently not valid in case of Polish. The need for lemmatization for automatic coreference resolution is supported by an empirical study. Correlation between cluster and mention count within a text is investigated, with short characteristics of outlier cases. We also examine this correlation in each of the 14 text domains present in the corpus and show that none of them has abnormal frequency of outlier texts regarding the cluster/mention ratio. Finally, we report on our negative experiences concerning the annotation of the near-identity relation. In the conclusion we put forward some guidelines for the future research in the area.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro%3Aetal%3A14%3Alrec">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=lrec-2014-plakat.pdf">conference poster</ulink>.</para></listitem><listitem><para><ulink url="http://www.aclweb.org/anthology/E14-4043">Kopeć – Zero subject detection for Polish</ulink></para><para><ulink url="http://eacl2014.org/">EACL 2014</ulink> (April 26-30, 2014, Gothenburg) paper on the first machine learning experiments on detection of null subjects in Polish. It emphasizes the role of zero subject detection as the part of mention detection – the initial step of end-to-end coreference resolution. Anaphora resolution is not studied in this article.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/kop%3A14%3Aeacl%3Ashort">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=eacl-2014-prezentacja.pdf">EACL presentation</ulink>.</para></listitem><listitem><para><ulink url="http://www.aclweb.org/anthology/E14-2024">Kopeć – MMAX2 for coreference annotation</ulink></para><para>EACL 2014 demo session paper presenting major modifications in the MMAX2 manual annotation tool, which were implemented for the coreference annotation of Polish texts. Among other things, a new feature of adjudication is described, as well as some general insight into the manual annotation tool selection process for the natural language processing tasks.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/kop%3A14%3Aeacl%3Ademo">BibTeX citation</ulink>.</para></listitem><listitem><para><ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=ACIIDS-2014.pdf">Kopeć, Ogrodniczuk – Inter-Annotator Agreement in Coreference Annotation of Polish</ulink></para><para><ulink url="http://www.ic.kmitl.ac.th/aciids2014/">ACIIDS 2014</ulink> (April 7-9, 2014, Bangkok) conference paper discussing different methods of estimating the inter-annotator agreement in manual annotation of Polish coreference and proposing a new BLANC-based annotation agreement metric. The commonly used agreement indicators are calculated for mention detection, semantic head annotation, near-identity markup and coreference resolution.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/kop:ogro:14:aciids">BibTeX citation</ulink>, <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=aciids-2014-plakat.pdf">ACIIDS 2014 poster</ulink> and <ulink url="http://link.springer.com/chapter/10.1007/978-3-319-05503-9_15">Springer link</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/NLP-SEMINAR/140127.pdf">Kopeć – Automatyczne wykrywanie podmiotu zerowego</ulink></para><para>Presentation at the Natural Language Processing Seminar (January 27, 2014, Warsaw) discussing automated null subject detection (in Polish).</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:etal:13:ltc.pdf">Ogrodniczuk, Głowińska, Kopeć, Savary, Zawisławska – Polish Coreference Corpus</ulink></para><para><ulink url="http://www.ltc.amu.edu.pl/">LTC 2013</ulink> (December 7-9, 2013, Poznań) conference paper describing the composition, annotation process and availability of the newly constructed Polish Coreference Corpus – a large Polish corpus of general nominal coreference. The tools used in the process and final linguistic representation formats are also presented.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:etal:13:ltc">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=ltc-pcc-prezentacja.pdf">LTC 2013 presentation</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/nito:13:ltc.pdf">Nitoń – Evaluation of Uryupina’s coreference resolution features for Polish</ulink></para><para>LTC 2013 conference paper describing evaluation of a set of surface, syntactic and anaphoric features proposed in Uryupina 2007 and their usefulness for coreference resolution in Polish texts.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/nito:13:ltc">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=ltc-niton-prezentacja.pdf">LTC 2013 presentation</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:13:mike.pdf">Ogrodniczuk – Discovery of Common Nominal Facts for Coreference Resolution: Proof of concept</ulink></para><para><ulink url="http://www.wikicfp.com/cfp/servlet/event.showcfp?eventid=30962&amp;copyownerid=51382">MIKE 2013</ulink> (December 18-20, 2013, Virudhunagar) conference paper reporting on the preliminary experiment aimed at verification whether extraction of nominal facts corresponding to world knowledge from both structured and unstructured data could be effectively performed and its results used as a source of pragmatic knowledge for coreference resolution in Polish. Being the proof-of-concept only, this approach is work in progress and is intended to be further validated in a full-scale project.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:13:mike">BibTeX citation</ulink>, <ulink url="http://link.springer.com/chapter/10.1007/978-3-319-03844-5_69">Springer LNCS link</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=mike-prezentacja.pdf">MIKE 2013 presentation</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:etal:13:cnccl.pdf">Ogrodniczuk, Głowińska, Kopeć, Savary, Zawisławska – Interesting Linguistic Features in Coreference Annotation of a Highly Inflectional Language</ulink></para><para><ulink url="http://210.29.169.226/CNCCL2013/en/home.html">CCL 2013/NLP-NABD 2013</ulink> (October 10-12, 2013, Suzhou) conference paper reporting on linguistic features and decisions that we find vital in the process of annotation and resolution of coreference for highly inflectional languages. The presented results have been collected during preparation of a corpus of general direct nominal coreference of Polish. Starting from the notion of a mention, its borders and potential vs. actual referentiality, we discuss the problem of complete and near-identity, zero subjects and dominant expressions. We also present interesting linguistic cases influencing the coreference resolution such as the difference between semantic and syntactic heads or the phenomenon of coreference chains made of indefinite pronouns.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:etal:13:cnccl">BibTeX citation</ulink>, <ulink url="http://link.springer.com/chapter/10.1007/978-3-642-41491-6_10">Springer LNCS link</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=cnccl.pdf">CNCCL 2013 poster</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:len:13:nldb.pdf">Ogrodniczuk, Lenart – A Multi-Purpose Online Toolset for NLP Applications</ulink></para><para><ulink url="http://nldb.csesalford.com/index.htm">NLDB 2013</ulink> (June 19-21, 2013, Manchester) conference paper presenting a new implementation of the multipurpose set of NLP tools for Polish, made available online in a common web service framework. The tool set comprises a morphological analyzer, a tagger, a named entity recognizer, a dependency parser, a constituency parser and a coreference resolver. Additionally, a web application offering chaining capabilities and a common BRAT-based presentation framework is presented.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:len:13:nldb">BibTeX citation</ulink> and <ulink url="http://link.springer.com/chapter/10.1007/978-3-642-38824-8_46">Springer LNCS link</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:13:iis.pdf">Ogrodniczuk – Translation- and projection-based unsupervised coreference resolution for Polish</ulink></para><para><ulink url="http://iis.ipipan.waw.pl/lpiis/">LP&amp;IIS 2013</ulink> (June 17-18, 2013, Warsaw) conference paper describing the test of the translation- and projection-based method of implementation of a coreference resolver for an inflectional language. The paper also presents evaluation of the result on a corpus of general coreference and compare the results with state-of-the-art solutions of this type for other languages.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:13:iis">BibTeX citation</ulink>, <ulink url="http://link.springer.com/chapter/10.1007/978-3-642-38634-3_14">Springer LNCS link</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=iis-prezentacja.pdf">LP&amp;IIS 2013 presentation</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro:etal:13:cicling.pdf">Ogrodniczuk, Zawisławska, Głowińska, Savary – Coreference annotation schema for an inflectional language</ulink></para><para><ulink url="http://www.cicling.org/2013/">CICLING 2013</ulink> (March 24–30, 2013, Samos) conference paper commenting on the experience gained in preparation of a coreference corpus for an inflectional and free-word-order language carried out in an ongoing project, aiming at creating tools for coreference resolution. Starting with a clarification of the relation between noun groups and mentions, through definition of the annotation scope and strategies, up to actual decisions for borderline cases, we present the process of building the first, to our best knowledge, corpus of general coreference of Polish.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro:etal:13:cicling">BibTeX citation</ulink> and <ulink url="http://link.springer.com/chapter/10.1007%2F978-3-642-37247-6_32">Springer LNCS link</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/NLP-SEMINAR/121203.pdf">Ogrodniczuk, Głowińska, Zawisławska, Kopeć, Savary – Polski korpus koreferencyjny: wersja wstępna</ulink></para><para>Presentation at the Natural Language Processing Seminar (December 3, 2012, Warsaw) discussing methodology of the construction of the Polish Coreference Corpus (in Polish).</para></listitem><listitem><para><ulink url="http://hltdays.ipipan.waw.pl/pdf/lrt-for-polish-coreference-annotation-and-resolution.pdf">Kopeć, Ogrodniczuk – LRT for Polish coreference annotation and resolution</ulink></para><para><ulink url="http://hltdays.ipipan.waw.pl/">HLT Days 2012</ulink> (September 27-28, 2012, Warsaw) poster presented in Language Resources and Tools Hackathon session.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/Bib/ogro%3Azaw%3A12%3Aki">Ogrodniczuk, Zawisławska – Semantic Approach to Identity in Coreference Resolution Task</ulink></para><para><ulink url="http://www.dfki.de/KI2012/">KI 2012</ulink> (September 24-27, 2012, Saarbrücken) coreference paper confronting the idea of continuous nature of identity with experimental data for Polish, resulting in a new approach to this notion. It extends the definition of coreference with speaker/recipient relation, believed to be valid for all languages, and explains the near-identity with lexical and conceptual means. The theory is supported with Polish-English examples presenting difficulties in coreference interpretation.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro%3Azaw%3A12%3Aki">BibTeX citation</ulink> and <ulink url="http://link.springer.com/chapter/10.1007/978-3-642-33347-7_23">Springer LNCS link</ulink>.</para></listitem><listitem><para><ulink url="http://www.lrec-conf.org/proceedings/lrec2012/pdf/1064_Paper.pdf">Kopeć, Ogrodniczuk – Creating a Coreference Resolution System for Polish</ulink></para><para><ulink url="http://www.lrec-conf.org/lrec2012/">LREC 2012</ulink> (May 21-27, 2012, Istanbul) conference paper presenting the results of the first attempt of the coreference resolution for Polish using statistical methods. It presents the conclusions from the process of adapting the Beautiful Anaphora Resolution Toolkit (BART; a system primarily designed for the English language) for Polish and collates its evaluation results with those of the previously implemented rule-based system. Finally, we describe our plans for the future usage of the tool and highlight the upcoming research to be conducted, such as the experiments of a larger scale and the comparison with other machine learning tools.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/kop%3Aogro%3A12%3Alrec">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=lrec-2012-plakat.pdf">LREC 2012 poster</ulink>.</para></listitem><listitem><para><ulink url="http://nlp.ipipan.waw.pl/NLP-SEMINAR/120305.pdf">Ogrodniczuk, Głowińska, Zawisławska, Kopeć, Savary – Wstępna weryfikacja typologii i strategii anotacji koreferencji w tekstach polskich</ulink></para><para>Presentation at the Natural Language Processing Seminar (March 5, 2012, Warsaw) discussing typology of coreference and strategies of its annotation (in Polish).</para></listitem><listitem><para><ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=ltc-2011.pdf">Ogrodniczuk, Kopeć – End-to-end coreference resolution baseline system for Polish</ulink></para><para><ulink url="http://www.ltc.amu.edu.pl/a2011/">LTC 2011</ulink> (November 25-27, 2011, Poznań) conference paper presenting the results of the first attempt of coreference resolution for Polish, intended to create a useful baseline for future experiments with this topic. The resulting implementation is designed to run either on true mention boundaries (discovering coreference chains between them) or in an end-to-end manner, performing their automatic detection as the first step. The system uses a few rich rules, corresponding to syntactic constraints (elimination of nested nominal groups), syntactic filters (elimination of syntactic incompatible heads), semantic filters (wordnet-derived compatibility) and selection (weighted scoring). Results are evaluated against human annotation for two commonly used baseline variants of the resolver (all-singletons/head-match) and two target rule-based settings. The best working method is analysed, showing simple statistics about the two classes of errors made by the system.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro%3Akop%3A11%3Altc">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=ltc-2011-wystapienie.pdf">LTC 2011 presentation</ulink>.</para></listitem><listitem><para><ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=daarc-2011.pdf">Ogrodniczuk, Kopeć – Rule-based coreference resolution module for Polish</ulink></para><para><ulink url="http://daarc2011.clul.ul.pt/">DAARC 2011</ulink> (October 6-7, 2011, Faro) conference paper presenting the results of the first attempt of coreference resolution for Polish running on true mention boundaries and using a few rich rules, corresponding to syntactic constraints (elimination of nested nominal groups), syntactic filters (elimination of syntactic incompatible heads), semantic filters (wordnet-derived compatibility) and selection (weighted scoring). The results are compared to human annotation and presented in four sets: with two common baselines: all singletons/head-match, and two slightly more complex settings with four and five rules.</para><para>See <ulink url="http://publications.nlp.ipipan.waw.pl/bibtex/ogro%3Akop%3A11%3Adaarc">BibTeX citation</ulink> and <ulink url="http://core.ipipan.waw.pl/About/About?action=AttachFile&amp;do=get&amp;target=daarc-2011-plakat.pdf">DAARC 2011 poster</ulink>. </para></listitem></orderedlist></section><section><title>External contributions</title><para>Parts of the work described here were also contributed by other externally funded projects, carried out simultaneously with CORE: </para><itemizedlist><listitem><para>works on the new version of the Polish grammar for Spejd by Alicja Wójcicka and Katarzyna Głowińska were co-funded by the Polish Ministry of Science and Higher Education as an Investment in CLARINPL Research Infrastructure and by the European Union from resources of the European Social Fund </para></listitem><listitem><para>works related to linguistic evaluation of usefulness of Uryupina’s coreference features for Polish by Piotr Batko and development of adaptation of BART (Beautiful Anaphora Resolution Toolkit) for Polish by Bartłomiej Nitoń were co-funded by the European Union from financial resources of the European Social Fund, project PO KL <emphasis>Information technologies: Research and their interdisciplinary applications</emphasis> </para></listitem><listitem><para>works related to coreference-based approach to summarization were carried out within <ulink url="http://phd.ipipan.waw.pl/">PhD studies</ulink> of Mateusz Kopeć at the Institute of Computer Science, Polish Academy of Sciences </para></listitem><listitem><para>help with adaptation of coreference tools to <ulink url="http://multiservice.nlp.ipipan.waw.pl/">Multiservice</ulink>, a Web service framework for Polish NLP tools, was offered by Michał Lenart taking part in <ulink url="http://www.metanet.eu/projects/cesar">CESAR project</ulink> (Central and South-east European Resources, part of META-NET) financed from a European Competitiveness and Innovation framework Programme, Information and Communication Technologies Policy Support Programme (CIP ICT-PSP, grant agreement 271022) </para></listitem><listitem><para>projection-based experiments were made possible by the <ulink url="http://research.google.com/university/translate/">University Research Program for Google Translate</ulink> </para></listitem><listitem><para>contacts established with the parallel French coreference annotation project <ulink url="http://tln.li.univ-tours.fr/Tln_Ancor.html">ANCOR</ulink> were also beneficial for some of our scientific results and helped relate the CORE project more deeply to the international coreference community.  </para></listitem></itemizedlist></section><section><title>Project team</title><para>The core CORE project team constituted of (almost alphabetically): </para><itemizedlist><listitem><para><ulink url="http://zil.ipipan.waw.pl/MaciejOgrodniczuk">Maciej Ogrodniczuk</ulink> — principal investigator </para></listitem><listitem><para>Barbara Dunin-Kęplicz — formalization of coreference rules </para></listitem><listitem><para>Maria Głąbska — coreference annotation </para></listitem><listitem><para>Katarzyna Głowińska — linguistic expertise related to anaphora, coreference and Polish syntax </para></listitem><listitem><para>Anna Grzeszak — coreference annotation </para></listitem><listitem><para>Mateusz Kopeć — technical leadership, implementation and IT design, development of the annotation environment and project tools </para></listitem><listitem><para>Emilia Kubicka — coreference annotation </para></listitem><listitem><para>Barbara Masny — coreference annotation </para></listitem><listitem><para>Paulina Rosalska — coreference annotation </para></listitem><listitem><para>Agata Savary — coreference annotation and annotation work expertise </para></listitem><listitem><para>Magdalena Zawisławska — linguistic and semantic expertise, annotation management, adjudication of the annotation of Polish Coreference Corpus </para></listitem><listitem><para>Sebastian Żurowski — coreference annotation </para></listitem></itemizedlist><para>but there were numerous other people, mainly colleagues from the <ulink url="http://zil.ipipan.waw.pl/">Linguistic Engineering Group at the Institute of Computer Science, Polish Academy of Sciences</ulink>, who contributed to various stages of the project with their selfless help: </para><itemizedlist><listitem><para>Piotr Batko — coreference annotation, verification of coreference features for Polish (linguistic part) </para></listitem><listitem><para>Łukasz Degórski — help related to processing NKJP data </para></listitem><listitem><para>Łukasz Dębowski — statistical expertise </para></listitem><listitem><para>Michał Lenart — help related to processing NKJP data, hardware expertise, Multiservice integration assistance </para></listitem><listitem><para>Małgorzata Marciniak — HPSG anaphora expertise </para></listitem><listitem><para>Bartłomiej Nitoń — verification of coreference features for Polish (implementation part) </para></listitem><listitem><para>Adam Przepiórkowski — linguistic and natural language processing expertise, management of co-operation with the National Corpus of Polish </para></listitem><listitem><para>Filip Skwarski — translation and proofreading </para></listitem><listitem><para>Jakub Waszczuk — expertise related to annotation and named entity-related tools, versioning system management </para></listitem><listitem><para>Joanna Wierucka — translation and proofreading. </para></listitem></itemizedlist></section></section></article>