@article{sparckjones_1972_statistical, title = {A Statistical Interpretation of Term Specificity and Its Application in Retrieval}, author = {Sp\"{a}rck Jones, Karen}, number = 28, issue = 1, pages = {11--21}, date = 1972, journaltitle = {Journal of Documentation}, url = {https://dl.acm.org/doi/10.5555/106765.106782}, urldate = {2025-10-14} } @article{kilgarriff_2001_comparing, title = {Comparing {{Corpora}}}, author = {Kilgarriff, Adam}, number = 6, issue = 1, pages = {97--133}, doi = {10.1075/ijcl.6.1.05kil}, date = 2001, journaltitle = {International Journal of Corpus Linguistics}, langid = {english} } @article{evert_2017_understanding, title = {Understanding and {{Explaining Distance Measures}} for {{Authorship Attribution}}}, author = {Evert, Stefan and Jannidis, Fotis and Proisl, Thomas and Pielstr\"{o}m, Steffen and Vitt, Thorsten and Sch\"{o}ch, Christof and Reger, Isabella}, number = 23, issue = {suppl\_2}, doi = {10.1093/llc/fqx023}, date = 2017, journaltitle = {Digital Scholarship in the Humanities}, langid = {english} } @article{lijffijt_2014_significance, title = {Significance Testing of Word Frequencies in Corpora}, author = {Lijffijt, Jefrey and Nevalainen, Terttu and S\"{a}ily, Tanja and Papapetrou, Panagiotis and Puolam\"{a}ki, Kai and Mannila, Heikki}, number = 31, issue = 2, pages = {374--397}, doi = {10.1093/llc/fqu064}, date = 2014, journaltitle = {Digital Scholarship in the Humanities}, langid = {english} } @incollection{paquot_2009_distinctive, title = {Distinctive Words in Academic Writing: {{A}} Comparison of Three Statistical Tests for Keyword Extraction}, author = {Paquot, Magali and Bestgen, Yves}, booktitle = {Corpora: {{Pragmatics}} and {{Discourse}}}, publisher = {Brill \vert{} Rodopi}, doi = {10.1163/9789042029101_014}, editor = {Jucker, Andreas H. and Schreier, Daniel and Hundt, Marianne}, date = 2009, pages = {247-–269} } @article{egbert_2019_incorporating, title = {Incorporating Text Dispersion into Keyword Analyses}, author = {Egbert, Jesse and Biber, Doug}, number = 14, issue = 1, pages = {77--104}, doi = {10.3366/cor.2019.0162}, date = 2019, journaltitle = {Corpora} } @article{schroter_2021_keyness, title = {From {{Keyness}} to {{Distinctiveness}} – {{Triangulation}} and {{Evaluation}} in {{Computational Literary Studies}}}, author = {Schr\"{o}ter, Julian and Du, Keli and Dudar, Julia and Rok, Cora and Sch\"{o}ch, Christof}, number = 15, issue = {1-2}, pages = {81--108}, doi = {10.1515/jlt-2021-2011}, date = 2021, journaltitle = {Journal of Literary Theory}, langid = {english} } @inproceedings{du_2021_zeta, title = {Zeta \& {{Eta}}: {{An Exploration}} and {{Evaluation}} of {{Two Dispersion-based Measures}} of {{Distinctiveness}}}, author = {Du, Keli and Dudar, Julia and Rok, Cora and Sch\"{o}ch, Christof}, pages = {181--194}, url = {http://ceur-ws.org/Vol-2989/short_paper11.pdf}, urldate = {2025-10-14}, date = 2021, booktitle = {Proceedings of Computational Humanities Research 2021}, editor = {Ehrmann, Maud and Karsdorp, Folgert and Wevers, Melvin and Andrews, Tara Lee and Burghardt, Manuel and Kestemont, Mike and Manjavacas, Enrique and Piotrowski, Michael and van Zundert, Joris} } @article{du_2022_evaluation, title = {Evaluation of Measures of Distinctiveness: {{Classification}} of Literary Texts on the Basis of Distinctive Words}, author = {Du, Keli and Dudar, Julia and Sch\"{o}ch, Christof}, number = 1, issue = 1, doi = {10.48694/jcls.102}, date = 2022, journaltitle = {Journal of Computational Literary Studies}, langid = {english} } @article{sonning_2023_evaluation, title = {Evaluation of Keyness Metrics: Performance and Reliability}, author = {S\"{o}nning, Lukas}, doi = {10.1515/cllt-2022-0116}, date = 2023, number = 20, issue = 2, pages = {263--288}, journaltitle = {Corpus Linguistics and Linguistic Theory}, langid = {english} } @article{gries_2022_what, title = {What Do (Most of) Our Dispersion Measures Measure (Most)? {{Dispersion}}?}, author = {Gries, Stefan Th.}, number = 5, issue = 2, pages = {171--205}, doi = {10.1075/jsls.21029.gri}, date = 2022, journaltitle = {Journal of Second Language Studies}, langid = {english} } @software{du_2021_pydistinto, title = {Pydistinto - a {{Python}} Implementation of Different Measures of Distinctiveness for Contrastive Text Analysis}, author = {Du, Keli and Dudar, Julia and Sch\"{o}ch, Christof}, doi = {10.5281/zenodo.5245096}, date = 2021, organization = {Zenodo}, version = {v0.1.1} } @article{argamon_2007_interpreting, title = {Interpreting {{Burrows}}'s {{Delta}}: {{Geometric}} and {{Probabilistic Foundations}}}, author = {Argamon, Shlomo}, number = 23, issue = 2, pages = {131--147}, doi = {10.1093/llc/fqn003}, date = 2007, journaltitle = {Literary and Linguistic Computing}, langid = {english} } @article{culpeper_2009_keyness, title = {Keyness: {{Words}}, Parts-of-Speech and Semantic Categories in the Character-Talk of {{Shakespeare}}'s {{{\emph{Romeo}}}}{\emph{ and }}{{{\emph{Juliet}}}}}, author = {Culpeper, Jonathan}, year = {2009}, journal = {International Journal of Corpus Linguistics}, number = {14}, issue = {1}, pages = {29--59}, doi = {10.1075/ijcl.14.1.03cul}, langid = {english} } @article{gonon_2018_motifs, title = {Motifs Textuels Sp{\'e}cifiques Au Genre Policier et {\`a} La Litt{\'e}rature Blanche}, author = {Gonon, Laetitia and Goossens, Vannina and Kraif, Olivier and Novakova, Iva and Sorba, Julie}, editor = {Neveu, Franck and Harmegnies, Bernard and Hriba, Linda and Pr{\'e}vost, Sophie}, year = {2018}, journal = {6$^{e}$ Congr\`{e}s Mondial de Linguistique Fran\c{c}aise, SHS Web of Conferences}, number = {46}, doi = {10.1051/shsconf/20184606007} } @incollection{schoech_2018_zeta, title = {{Zeta f{\"u}r die kontrastive Analyse literarischer Texte. Theorie, Implementierung, Fallstudie}}, booktitle = {{Quantitative Ans{\"a}tze in den Literatur- und Geisteswissenschaften. Systematische und historische Perspektiven}}, author = {Sch{\"o}ch, Christof}, editor = {Bernhart, Toni and Richter, Sandra and Lepper, Marcus and Willand, Marcus and Albrecht, Andrea}, year = {2018}, pages = {77--94}, publisher = {De Gruyter}, langid = {german}, doi = {10.1515/9783110523300-004} } @article{weidman_2018_limits, title = {The Limits of Distinctive Words: {{Re-evaluating}} Literature's Gender Marker Debate}, author = {Weidman, Sean G. and O'Sullivan, James}, year = {2018}, journal = {Digital Scholarship in the Humanities}, number = {33}, issue = {2}, pages = {374--390}, doi = {10.1093/llc/fqx017}, langid = {english} } @article{burrows_2007_all, title = {All the {{Way Through}}: {{Testing}} for {{Authorship}} in {{Different Frequency Strata}}}, author = {Burrows, John}, year = 2007, journal = {Literary and Linguistic Computing}, number = {22}, issue = {1}, pages = {27--47}, doi = {10.1093/llc/fqi067}, langid = {english} } @inproceedings{schoch_2018_burrows, title = {Burrows' {Zeta}: {{Exploring}} and {{Evaluating Variants}} and{{Parameters}}}, booktitle = {Book of {{Abstracts}} of the {{Digital Humanities Conference 2018}}}, author = {Sch{\"o}ch, Christof and Schl{\"o}r, Daniel and Zehe, Albin and Gebhard, Henning and Becker, Martin and Hotho, Andreas}, year = 2018, publisher = {ADHO}, editor = {Jonathan Gir\'{o}n Palau and Isabel Galina Russell}, langid = {english}, url = {https://dh2018.adho.org/en/burrows-zeta-exploring-and-evaluating-variants-and-parameters/}, urldate = {2025-10-16} } @article{welch_1947_generalization, title = {The Generalization of {{Student}}'s Problem When Several Different Population Variances Are Involved}, author = {Welch, Bernard Lewis}, year = 1947, journal = {Biometrika}, number = {34}, issue = {1-2}, pages = {28--35}, doi = {10.1093/biomet/34.1-2.28}, langid = {english} } @article{wilcoxon_1945_individual, title = {Individual {{Comparisons}} by {{Ranking Methods}}}, author = {Wilcoxon, Frank}, date = {1945}, journaltitle = {Biometrics Bulletin}, number = {1}, issue = {6}, doi = {10.2307/3001968} } @article{mann_1947_test, title = {On a {{Test}} of {{Whether}} One of {{Two Random Variables}} Is {{Stochastically Larger}} than the {{Other}}}, author = {Mann, H. B. and Whitney, D. R.}, date = {1947}, journaltitle = {The Annals of Mathematical Statistics}, number = {18}, issue = {1}, pages = {50--60}, doi = {10.1214/aoms/1177730491}, langid = {english} } @incollection{gries_2010_useful, title = {Useful Statistics for Corpus Linguistics}, booktitle = {A Mosaic of Corpus Linguistics: Selected Approaches}, author = {Gries, Stefan Th.}, editor = {S\'{a}nchez, Aquilino and Almela, Mois\'{e}s}, date = {2010}, pages = {269--291}, publisher = {Peter Lang}, langid = {english} } @article{plackett_1983_karl, title = {Karl {{Pearson}} and the {{Chi-Squared Test}}}, author = {Plackett, Robin L.}, date = {1983}, journaltitle = {International Statistical Review / Revue Internationale de Statistique}, number = {51}, issue = {1}, doi = {10.2307/1402731} } @article{dunning_1993_accurate, title = {Accurate {{Methods}} for the {{Statistics}} of {{Surprise}} and {{Coincidence}}}, author = {Dunning, Ted}, date = {1993}, journaltitle = {Computational Linguistics}, number = {19}, issue = {1}, pages = {61--74}, url = {http://aclweb.org/anthology/J93-1003}, urldate = {2025-10-16}, langid = {english} } @article{honnibal_2020_industrial, author = {Honnibal, Matthew and Montani, Ines and Van Landeghem, Sofie and Boyd, Adriane}, title = {spaCy: Industrial-strength Natural Language Processing in Python}, year = 2020, doi = {10.5281/zenodo.1212303}, journal = {Zenodo} }