%%% -*-BibTeX-*- %%% ==================================================================== %%% BibTeX-file{ %%% author = "Nelson H. F. Beebe", %%% version = "1.47", %%% date = "30 January 2026", %%% time = "09:48:50 MDT", %%% filename = "tallip.bib", %%% address = "University of Utah %%% Department of Mathematics, 110 LCB %%% 155 S 1400 E RM 233 %%% Salt Lake City, UT 84112-0090 %%% USA", %%% telephone = "+1 801 581 5254", %%% URL = "https://www.math.utah.edu/~beebe", %%% checksum = "33222 35584 172013 1653288", %%% email = "beebe at math.utah.edu, beebe at acm.org, %%% beebe at computer.org (Internet)", %%% codetable = "ISO/ASCII", %%% keywords = "ACM Transactions on Asian and Low-Resource %%% Language Information Processing (TALLIP); %%% bibliography; BibTeX; TALLIP", %%% license = "public domain", %%% supported = "yes", %%% docstring = "This is a COMPLETE BibTeX bibliography for %%% ACM Transactions on Asian and Low-Resource %%% Language Information Processing (TALLIP) %%% (CODEN none, ISSN 2375-4699 (print), %%% 2375-4702 (electronic)). Publication began %%% with volume 14, number 1, in 2015 as a %%% continuation of the predecessor journal, %%% ACM Transactions on Asian language %%% information processing (TALIP), which is %%% covered in a separate bibliography, talip.bib. %%% %%% The journal has a World Wide Web sites at %%% %%% https://dl.acm.org/journal/tallip %%% https://dl.acm.org/loi/tallip %%% %%% At version 1.47, the year coverage looked %%% like this: %%% %%% 2015 ( 19) 2019 ( 48) 2023 ( 242) %%% 2016 ( 43) 2020 ( 88) 2024 ( 172) %%% 2017 ( 23) 2021 ( 109) 2025 ( 143) %%% 2018 ( 27) 2022 ( 130) 2026 ( 7) %%% %%% Article: 1051 %%% %%% Total entries: 1051 %%% %%% This bibliography has been constructed %%% primarily from the publisher Web site. %%% %%% Numerous errors in the sources noted above %%% have been corrected. Spelling has been %%% verified with the UNIX spell and GNU ispell %%% programs using the exception dictionary %%% stored in the companion file with extension %%% .sok. %%% %%% BibTeX citation tags are uniformly chosen as %%% name:year:abbrev, where name is the family %%% name of the first author or editor, year is a %%% 4-digit number, and abbrev is a 3-letter %%% condensation of important title words. %%% Citation labels were automatically generated %%% by software developed for the BibNet Project. %%% %%% In this bibliography, entries are sorted in %%% publication order, with the help of %%% ``bibsort -byvolume''. The bibsort utility %%% is available from ftp.math.utah.edu in %%% /pub/tex/bib. %%% %%% The checksum field above contains a CRC-16 %%% checksum as the first value, followed by the %%% equivalent of the standard UNIX wc (word %%% count) utility output of lines, words, and %%% characters. This is produced by Robert %%% Solovay's checksum utility.", %%% } %%% ==================================================================== @Preamble{ "\hyphenation{ }" } %%% ==================================================================== %%% Acknowledgement abbreviations: @String{ack-nhfb = "Nelson H. F. Beebe, University of Utah, Department of Mathematics, 110 LCB, 155 S 1400 E RM 233, Salt Lake City, UT 84112-0090, USA, Tel: +1 801 581 5254, e-mail: \path|beebe@math.utah.edu|, \path|beebe@acm.org|, \path|beebe@computer.org| (Internet), URL: \path|https://www.math.utah.edu/~beebe/|"} %%% ==================================================================== %%% Journal abbreviations: @String{j-TALLIP = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)"} %%% ==================================================================== %%% Bibliography entries: @Article{Uematsu:2015:IMD, author = "Sumire Uematsu and Takuya Matsuzaki and Hiroki Hanaoka and Yusuke Miyao and Hideki Mima", title = "Integrating Multiple Dependency Corpora for Inducing Wide-Coverage {Japanese} {CCG} Resources", journal = j-TALLIP, volume = "14", number = "1", pages = "1:1--1:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2658997", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A novel method to induce wide-coverage Combinatory Categorial Grammar (CCG) resources for Japanese is proposed in this article. For some languages including English, the availability of large annotated corpora and the development of data-based induction of lexicalized grammar have enabled deep parsing, i.e., parsing based on lexicalized grammars. However, deep parsing for Japanese has not been widely studied. This is mainly because most Japanese syntactic resources are represented in chunk-based dependency structures, while previous methods for inducing grammars are dependent on tree corpora. To translate syntactic information presented in chunk-based dependencies to phrase structures as accurately as possible, integration of annotation from multiple dependency-based corpora is proposed. Our method first integrates dependency structures and predicate-argument information and converts them into phrase structure trees. The trees are then transformed into CCG derivations in a similar way to previously proposed methods. The quality of the conversion is empirically evaluated in terms of the coverage of the obtained CCG lexicon and the accuracy of the parsing with the grammar. While the transforming process used in this study is specialized for Japanese, the framework of our method would be applicable to other languages for which dependency-based analysis has been regarded as more appropriate than phrase structure-based analysis due to morphosyntactic features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ramrakhiyani:2015:ATE, author = "Nitin Ramrakhiyani and Prasenjit Majumder", title = "Approaches to Temporal Expression Recognition in {Hindi}", journal = j-TALLIP, volume = "14", number = "1", pages = "2:1--2:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2629574", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Temporal annotation of plain text is considered a useful component of modern information retrieval tasks. In this work, different approaches for identification and classification of temporal expressions in Hindi are developed and analyzed. First, a rule-based approach is developed, which takes plain text as input and based on a set of hand-crafted rules, produces a tagged output with identified temporal expressions. This approach performs with a strict F1-measure of 0.83. In another approach, a CRF-based classifier is trained with human tagged data and is then tested on a test dataset. The trained classifier identifies the time expressions from plain text and further classifies them to various classes. This approach performs with a strict F1-measure of 0.78. Next, the CRF is replaced by an SVM-based classifier and the same experiment is performed with the same features. This approach is shown to be comparable to the CRF and performs with a strict F1-measure of 0.77. Using the rule base information as an additional feature enhances the performances to 0.86 and 0.84 for the CRF and SVM respectively. With three different comparable systems performing the extraction task, merging them to take advantage of their positives is the next step. As the first merge experiment, rule-based tagged data is fed to the CRF and SVM classifiers as additional training data. Evaluation results report an increase in F1-measure of the CRF from 0.78 to 0.8. Second, a voting-based approach is implemented, which chooses the best class for each token from the outputs of the three approaches. This approach results in the best performance for this task with a strict F1-measure of 0.88. In this process a reusable gold standard dataset for temporal tagging in Hindi is also developed. Named the ILTIMEX2012 corpus, it consists of 300 manually tagged Hindi news documents.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumari:2015:ITD, author = "B. Venkata Seshu Kumari and Ramisetty Rajeshwara Rao", title = "Improving {Telugu} Dependency Parsing using Combinatory Categorial Grammar Supertags", journal = j-TALLIP, volume = "14", number = "1", pages = "3:1--3:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2693190.2693191", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We show that Combinatory Categorial Grammar (CCG) supertags can improve Telugu dependency parsing. In this process, we first extract a CCG lexicon from the dependency treebank. Using both the CCG lexicon and the dependency treebank, we create a CCG treebank using a chart parser. Exploring different morphological features of Telugu, we develop a supertagger using maximum entropy models. We provide CCG supertags as features to the Telugu dependency parser (MST parser). We get an improvement of 1.8\% in the unlabelled attachment score and 2.2\% in the labelled attachment score. Our results show that CCG supertags improve the MST parser, especially on verbal arguments for which it has weak rates of recovery.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ketui:2015:EBA, author = "Nongnuch Ketui and Thanaruk Theeramunkong and Chutamanee Onsuwan", title = "An {EDU}-Based Approach for {Thai} Multi-Document Summarization and Its Application", journal = j-TALLIP, volume = "14", number = "1", pages = "4:1--4:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2641567", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Due to lack of a word/phrase/sentence boundary, summarization of Thai multiple documents has several challenges in unit segmentation, unit selection, duplication elimination, and evaluation dataset construction. In this article, we introduce Thai Elementary Discourse Units (TEDUs) and their derivatives, called Combined TEDUs (CTEDUs), and then present our three-stage method of Thai multi-document summarization, that is, unit segmentation, unit-graph formulation, and unit selection and summary generation. To examine performance of our proposed method, a number of experiments are conducted using 50 sets of Thai news articles with their manually constructed reference summaries. Based on measures of ROUGE-1, ROUGE-2, and ROUGE-SU4, the experimental results show that: (1) the TEDU-based summarization outperforms paragraph-based summarization; (2) our proposed graph-based TEDU weighting with importance-based selection achieves the best performance; and (3) unit duplication consideration and weight recalculation help improve summary quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sproat:2015:TPE, author = "Richard Sproat", title = "{TALLIP} Perspectives: Editorial Commentary: The Broadened Focus of the Journal", journal = j-TALLIP, volume = "14", number = "1", pages = "5:1--5:??", month = jan, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2710043", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:48 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shen:2015:MGA, author = "Han-ping Shen and Chung-hsien Wu and Pei-shan Tsai", title = "Model Generation of Accented Speech using Model Transformation and Verification for Bilingual Speech Recognition", journal = j-TALLIP, volume = "14", number = "2", pages = "6:1--6:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2661637", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Nowadays, bilingual or multilingual speech recognition is confronted with the accent-related problem caused by non-native speech in a variety of real-world applications. Accent modeling of non-native speech is definitely challenging, because the acoustic properties in highly-accented speech pronounced by non-native speakers are quite divergent. The aim of this study is to generate highly Mandarin-accented English models for speakers whose mother tongue is Mandarin. First, a two-stage, state-based verification method is proposed to extract the state-level, highly-accented speech segments automatically. Acoustic features and articulatory features are successively used for robust verification of the extracted speech segments. Second, Gaussian components of the highly-accented speech models are generated from the corresponding Gaussian components of the native speech models using a linear transformation function. A decision tree is constructed to categorize the transformation functions and used for transformation function retrieval to deal with the data sparseness problem. Third, a discrimination function is further applied to verify the generated accented acoustic models. Finally, the successfully verified accented English models are integrated into the native bilingual phone model set for Mandarin-English bilingual speech recognition. Experimental results show that the proposed approach can effectively alleviate recognition performance degradation due to accents and can obtain absolute improvements of 4.1\%, 1.8\%, and 2.7\% in word accuracy for bilingual speech recognition compared to that using traditional ASR approaches, MAP-adapted, and MLLR-adapted ASR methods, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Awajan:2015:KEA, author = "Arafat Awajan", title = "Keyword Extraction from {Arabic} Documents using Term Equivalence Classes", journal = j-TALLIP, volume = "14", number = "2", pages = "7:1--7:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2665077", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The rapid growth of the Internet and other computing facilities in recent years has resulted in the creation of a large amount of text in electronic form, which has increased the interest in and importance of different automatic text processing applications, including keyword extraction and term indexing. Although keywords are very useful for many applications, most documents available online are not provided with keywords. We describe a method for extracting keywords from Arabic documents. This method identifies the keywords by combining linguistics and statistical analysis of the text without using prior knowledge from its domain or information from any related corpus. The text is preprocessed to extract the main linguistic information, such as the roots and morphological patterns of derivative words. A cleaning phase is then applied to eliminate the meaningless words from the text. The most frequent terms are clustered into equivalence classes in which the derivative words generated from the same root and the non-derivative words generated from the same stem are placed together, and their count is accumulated. A vector space model is then used to capture the most frequent N-gram in the text. Experiments carried out using a real-world dataset show that the proposed method achieves good results with an average precision of 31\% and average recall of 53\% when tested against manually assigned keywords.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sundaram:2015:BLM, author = "Suresh Sundaram and A. G. Ramakrishnan", title = "Bigram Language Models and Reevaluation Strategy for Improved Recognition of Online Handwritten {Tamil} Words", journal = j-TALLIP, volume = "14", number = "2", pages = "8:1--8:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2671014", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article describes a postprocessing strategy for online, handwritten, isolated Tamil words. Contributions have been made with regard to two issues hardly addressed in the online Indic word recognition literature, namely, use of (1) language models exploiting the idiosyncrasies of Indic scripts and (2) expert classifiers for the disambiguation of confused symbols. The input word is first segmented into its individual symbols, which are recognized using a primary support vector machine (SVM) classifier. Thereafter, we enhance the recognition accuracy by utilizing (i) a bigram language model at the symbol or character level and (ii) expert classifiers for reevaluating and disambiguating the different sets of confused symbols. The symbol-level bigram model is used in a traditional Viterbi framework. The concept of a character comprising multiple symbols is unique to Dravidian languages such as Tamil. This multi-symbol feature of Tamil characters has been exploited in proposing a novel, prefix-tree-based character-level bigram model that does not use Viterbi search; rather it reduces the search space for each input symbol based on its left context. For disambiguating confused symbols, a dynamic time-warping approach is proposed to automatically identify the parts of the online trace that discriminates between the confused classes. Fine classification of these regions by dedicated expert SVMs reduces the extent of confusions between such symbols. The integration of segmentation, prefix-tree-based language model and disambiguation of confused symbols is presented on a set of 15,000 handwritten isolated online Tamil words. Our results show recognition accuracies of 93.0\% and 81.6\% at the symbol and word level, respectively, as compared to the baseline classifier performance of 88.4\% and 65.1\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2015:TMT, author = "Jiajun Zhang and Shujie Liu and Mu Li and Ming Zhou and Chengqing Zong", title = "Towards Machine Translation in Semantic Vector Space", journal = j-TALLIP, volume = "14", number = "2", pages = "9:1--9:??", month = mar, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2699927", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Measuring the quality of the translation rules and their composition is an essential issue in the conventional statistical machine translation (SMT) framework. To express the translation quality, the previous lexical and phrasal probabilities are calculated only according to the co-occurrence statistics in the bilingual corpus and may be not reliable due to the data sparseness problem. To address this issue, we propose measuring the quality of the translation rules and their composition in the semantic vector embedding space (VES). We present a recursive neural network (RNN)-based translation framework, which includes two submodels. One is the bilingually-constrained recursive auto-encoder, which is proposed to convert the lexical translation rules into compact real-valued vectors in the semantic VES. The other is a type-dependent recursive neural network, which is proposed to perform the decoding process by minimizing the semantic gap (meaning distance) between the source language string and its translation candidates at each state in a bottom-up structure. The RNN-based translation model is trained using a max-margin objective function that maximizes the margin between the reference translation and the n-best translations in forced decoding. In the experiments, we first show that the proposed vector representations for the translation rules are very reliable for application in translation modeling. We further show that the proposed type-dependent, RNN-based model can significantly improve the translation quality in the large-scale, end-to-end Chinese-to-English translation evaluation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Na:2015:CRF, author = "Seung-Hoon Na", title = "Conditional Random Fields for {Korean} Morpheme Segmentation and {POS} Tagging", journal = j-TALLIP, volume = "14", number = "3", pages = "10:1--10:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2700051", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "There has been recent interest in statistical approaches to Korean morphological analysis. However, previous studies have been based mostly on generative models, including a hidden Markov model (HMM), without utilizing discriminative models such as a conditional random field (CRF). We present a two-stage discriminative approach based on CRFs for Korean morphological analysis. Similar to methods used for Chinese, we perform two disambiguation procedures based on CRFs: (1) morpheme segmentation and (2) POS tagging. In morpheme segmentation, an input sentence is segmented into sequences of morphemes, where a morpheme unit is either atomic or compound. In the POS tagging procedure, each morpheme (atomic or compound) is assigned a POS tag. Once POS tagging is complete, we carry out a post-processing of the compound morphemes, where each compound morpheme is further decomposed into atomic morphemes, which is based on pre-analyzed patterns and generalized HMMs obtained from the given tagged corpus. Experimental results show the promise of our proposed method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2015:MTM, author = "Xiaodong Liu and Kevin Duh and Yuji Matsumoto", title = "Multilingual Topic Models for Bilingual Dictionary Extraction", journal = j-TALLIP, volume = "14", number = "3", pages = "11:1--11:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2699939", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A machine-readable bilingual dictionary plays a crucial role in many natural language processing tasks, such as statistical machine translation and cross-language information retrieval. In this article, we propose a framework for extracting a bilingual dictionary from comparable corpora by exploiting a novel combination of topic modeling and word aligners such as the IBM models. Using a multilingual topic model, we first convert a comparable document -aligned corpus into a parallel topic -aligned corpus. This novel topic-aligned corpus is similar in structure to the sentence -aligned corpus frequently employed in statistical machine translation and allows us to extract a bilingual dictionary using a word alignment model. The main advantages of our framework is that (1) no seed dictionary is necessary for bootstrapping the process, and (2) multilingual comparable corpora in more than two languages can also be exploited. In our experiments on a large-scale Wikipedia dataset, we demonstrate that our approach can extract higher precision dictionaries compared to previous approaches and that our method improves further as we add more languages to the dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2015:UMS, author = "Xiaoqing Li and Chengqing Zong and Keh-yih Su", title = "A Unified Model for Solving the {OOV} Problem of {Chinese} Word Segmentation", journal = j-TALLIP, volume = "14", number = "3", pages = "12:1--12:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2699940", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article proposes a unified, character-based, generative model to incorporate additional resources for solving the out-of-vocabulary (OOV) problem of Chinese word segmentation, within which different types of additional information can be utilized independently in corresponding submodels. This article mainly addresses the following three types of OOV: unseen dictionary words, named entities, and suffix-derived words, none of which are handled well by current approaches. The results show that our approach can effectively improve the performance of the first two types with positive interaction in F-score. Additionally, we also analyze reason that suffix information is not helpful. After integrating the proposed generative model with the corresponding discriminative approach, our evaluation on various corpora---including SIGHAN-2005, CIPS-SIGHAN-2010, and the Chinese Treebank (CTB)---shows that our integrated approach achieves the best performance reported in the literature on all testing sets when additional information and resources are allowed.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Goto:2015:PUT, author = "Isao Goto and Masao Utiyama and Eiichiro Sumita and Sadao Kurohashi", title = "Preordering using a Target-Language Parser via Cross-Language Syntactic Projection for Statistical Machine Translation", journal = j-TALLIP, volume = "14", number = "3", pages = "13:1--13:??", month = jun, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2699925", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "When translating between languages with widely different word orders, word reordering can present a major challenge. Although some word reordering methods do not employ source-language syntactic structures, such structures are inherently useful for word reordering. However, high-quality syntactic parsers are not available for many languages. We propose a preordering method using a target-language syntactic parser to process source-language syntactic structures without a source-language syntactic parser. To train our preordering model based on ITG, we produced syntactic constituent structures for source-language training sentences by (1) parsing target-language training sentences, (2) projecting constituent structures of the target-language sentences to the corresponding source-language sentences, (3) selecting parallel sentences with highly synchronized parallel structures, (4) producing probabilistic models for parsing using the projected partial structures and the Pitman-Yor process, and (5) parsing to produce full binary syntactic structures maximally synchronized with the corresponding target-language syntactic structures, using the constraints of the projected partial structures and the probabilistic models. Our ITG-based preordering model is trained using the produced binary syntactic structures and word alignments. The proposed method facilitates the learning of ITG by producing highly synchronized parallel syntactic structures based on cross-language syntactic projection and sentence selection. The preordering model jointly parses input sentences and identifies their reordered structures. Experiments with Japanese--English and Chinese--English patent translation indicate that our method outperforms existing methods, including string-to-tree syntax-based SMT, a preordering method that does not require a parser, and a preordering method that uses a source-language dependency parser.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Costa-Jussa:2016:DCS, author = "Marta R. Costa-Juss{\`a} and Jordi Centelles", title = "Description of the {Chinese}-to-{Spanish} Rule-Based Machine Translation System Developed Using a Hybrid Combination of Human Annotation and Statistical Techniques", journal = j-TALLIP, volume = "15", number = "1", pages = "1:1--1:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2738045", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Two of the most popular Machine Translation (MT) paradigms are rule based (RBMT) and corpus based, which include the statistical systems (SMT). When scarce parallel corpus is available, RBMT becomes particularly attractive. This is the case of the Chinese--Spanish language pair. This article presents the first RBMT system for Chinese to Spanish. We describe a hybrid method for constructing this system taking advantage of available resources such as parallel corpora that are used to extract dictionaries and lexical and structural transfer rules. The final system is freely available online and open source. Although performance lags behind standard SMT systems for an in-domain test set, the results show that the RBMT's coverage is competitive and it outperforms the SMT system in an out-of-domain test set. This RBMT system is available to the general public, it can be further enhanced, and it opens up the possibility of creating future hybrid MT systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khanduja:2016:HFE, author = "Deepti Khanduja and Neeta Nain and Subhash Panwar", title = "A Hybrid Feature Extraction Algorithm for {Devanagari} Script", journal = j-TALLIP, volume = "15", number = "1", pages = "2:1--2:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2710018", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The efficiency of any character recognition technique is directly dependent on the accuracy of the generated feature set that could uniquely represent a character and hence correctly recognize it. This article proposes a hybrid approach combining the structural features of the character and a mathematical model of curve fitting to simulate the best features of a character. As a preprocessing step, skeletonization of the character is performed using an iterative thinning algorithm based on Raster scan of the character image. Then, a combination of structural features of the character like number of endpoints, loops, and intersection points is calculated. Further, the thinned character image is statistically zoned into partitions, and a quadratic curve-fitting model is applied on each partition forming a feature vector of the coefficients of the optimally fitted curve. This vector is combined with the spatial distribution of the foreground pixels for each zone and hence script-independent feature representation. The approach has been evaluated experimentally on Devanagari scripts. The algorithm achieves an average recognition accuracy of 93.4\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shatnawi:2016:IHA, author = "Maad Shatnawi and Sherief Abdallah", title = "Improving Handwritten {Arabic} Character Recognition by Modeling Human Handwriting Distortions", journal = j-TALLIP, volume = "15", number = "1", pages = "3:1--3:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2764456", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Handwritten Arabic character recognition systems face several challenges, including the unlimited variation in human handwriting and the unavailability of large public databases of handwritten characters and words. The use of synthetic data for training and testing handwritten character recognition systems is one of the possible solutions to provide several variations for these characters and to overcome the lack of large databases. While this can be using arbitrary distortions, such as image noise and randomized affine transformations, such distortions are not realistic. In this work, we model real distortions in handwriting using real handwritten Arabic character examples and then use these distortion models to synthesize handwritten examples that are more realistic. We show that the use of our proposed approach leads to significant improvements across different machine-learning classification algorithms.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wushouer:2016:CAP, author = "Mairidan Wushouer and Donghui Lin and Toru Ishida and Katsutoshi Hirayama", title = "A Constraint Approach to Pivot-Based Bilingual Dictionary Induction", journal = j-TALLIP, volume = "15", number = "1", pages = "4:1--4:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2723144", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "High-quality bilingual dictionaries are very useful, but such resources are rarely available for lower-density language pairs, especially for those that are closely related. Using a third language to link two other languages is a well-known solution and usually requires only two input bilingual dictionaries A-B and B-C to automatically induce the new one, A-C. This approach, however, has never been demonstrated to utilize the complete structures of the input bilingual dictionaries, and this is a key failing because the dropped meanings negatively influence the result. This article proposes a constraint approach to pivot-based dictionary induction where language A and C are closely related. We create constraints from language similarity and model the structures of the input dictionaries as a Boolean optimization problem, which is then formulated within the Weighted Partial Max-SAT framework, an extension of Boolean Satisfiability (SAT). All of the encoded CNF (Conjunctive Normal Form), the predominant input language of modern SAT/MAX-SAT solvers, formulas are evaluated by a solver to produce the target (output) bilingual dictionary. Moreover, we discuss alternative formalizations as a comparison study. We designed a tool that uses the Sat4j library as the default solver to implement our method and conducted an experiment in which the output bilingual dictionary achieved better quality than the baseline method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yeh:2016:SAI, author = "Jui-Feng Yeh", title = "Speech Act Identification Using Semantic Dependency Graphs with Probabilistic Context-Free Grammars", journal = j-TALLIP, volume = "15", number = "1", pages = "5:1--5:??", month = jan, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2786978", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We propose an approach for identifying the speech acts of speakers' utterances in conversational spoken dialogue that involves using semantic dependency graphs with probabilistic context-free grammars (PCFGs). The semantic dependency graph based on the HowNet knowledge base is adopted to model the relationships between words in an utterance parsed by PCFG. Dependency relationships between words within the utterance are extracted by decomposing the semantic dependency graph according to predefined events. The corresponding values of semantic slots are subsequently extracted from the speaker's utterances according to the corresponding identified speech act. The experimental results obtained when using the proposed approach indicated that the accuracy rates of speech act detection and task completion were 95.6\% and 77.4\% for human-generated transcription (REF) and speech-to-text recognition output (STT), respectively, and the average numbers of turns of each dialogue were 8.3 and 11.8 for REF and STT, respectively. Compared with Bayes classifier, partial pattern tree, and Bayesian-network-based approaches, we obtained 14.1\%, 9.2\%, and 3\% improvements in the accuracy of speech act identification, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2016:CCSa, author = "Ting-Xuan Wang and Wen-Hsiang Lu", title = "Constructing Complex Search Tasks with Coherent Subtask Search Goals", journal = j-TALLIP, volume = "15", number = "2", pages = "6:1--6:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2742547", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Nowadays, due to the explosive growth of web content and usage, users deal with their complex search tasks by web search engines. However, conventional search engines consider a search query corresponding only to a simple search task. In order to accomplish a complex search task, which consists of multiple subtask search goals, users usually have to issue a series of queries. For example, the complex search task ``travel to Dubai'' may involve several subtask search goals, including reserving hotel room, surveying Dubai landmarks, booking flights, and so forth. Therefore, a user can efficiently accomplish his or her complex search task if search engines can predict the complex search task with a variety of subtask search goals. In this work, we propose a complex search task model (CSTM) to deal with this problem. The CSTM first groups queries into complex search task clusters, and then generates subtask search goals from each complex search task cluster. To raise the performance of CSTM, we exploit four web resources including community question answering, query logs, search engine result pages, and clicked pages. Experimental results show that our CSTM is effective in identifying the comprehensive subtask search goals of a complex search task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tsai:2016:CWB, author = "Richard Tzong-Han Tsai", title = "Collective {Web}-Based Parenthetical Translation Extraction Using {Markov} Logic Networks", journal = j-TALLIP, volume = "15", number = "2", pages = "7:1--7:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2794399", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Parenthetical translations are translations of terms in otherwise monolingual text that appear inside parentheses. Parenthetical translations extraction (PTE) is the task of extracting parenthetical translations from natural language documents. One of the main difficulties in PTE is to detect the left boundary of the translated term in preparenthetical text. In this article, we propose a collective approach that employs Markov logic to model multiple constraints used in the PTE task. We show how various constraints can be formulated and combined in a Markov logic network (MLN). Our experimental results show that the proposed collective PTE approach significantly outperforms a current state-of-the-art method, improving the average F-measure up to 27.11\% compared to the previous word alignment approach. It also outperforms an individual MLN-based system by 8.2\% and a system based on conditional random fields by 5.9\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2016:FHW, author = "Amita Jain and D. K. Lobiyal", title = "Fuzzy {Hindi} {WordNet} and Word Sense Disambiguation Using Fuzzy Graph Connectivity Measures", journal = j-TALLIP, volume = "15", number = "2", pages = "8:1--8:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2790079", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we propose Fuzzy Hindi WordNet, which is an extended version of Hindi WordNet. The proposed idea of fuzzy relations and their role in modeling Fuzzy Hindi WordNet is explained. We mathematically define fuzzy relations and the composition of these fuzzy relations for this extended version. We show that the concept of composition of fuzzy relations can be used to infer a relation between two words that otherwise are not directly related in Hindi WordNet. Then we propose fuzzy graph connectivity measures that include both local and global measures. These measures are used in determining the significance of a concept (which is represented as a vertex in the fuzzy graph) in a specific context. Finally, we show how these extended measures solve the problem of word sense disambiguation (WSD) effectively, which is useful in many natural language processing applications to improve their performance. Experiments on standard sense tagged corpus for WSD show better results when Fuzzy Hindi WordNet is used in place of Hindi WordNet.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kertkeidkachorn:2016:AFH, author = "Natthawut Kertkeidkachorn and Proadpran Punyabukkana and Atiwong Suchato", title = "Acoustic Features for Hidden Conditional Random Fields-Based {Thai} Tone Classification", journal = j-TALLIP, volume = "15", number = "2", pages = "9:1--9:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2833088", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the Thai language, tone information is necessary for Thai speech recognition systems. Previous studies show that many acoustic cues are attributed to shapes of tones. Nevertheless, most Thai tone classification studies mainly adopted F$_0$ values and their derivatives without considering other acoustic features. In this article, other acoustic features for Thai tone classification are investigated. In the experiment, energy values and spectral information represented by three spectral-based features including the LPC-based feature, PLP-based feature, and MFCC-based feature are applied to the HCRF-based Thai tone classification, which was reported as the best approach for Thai tone classification. The energy values provide an error rate reduction of 22.40\% in the isolated word scenario, while there are slight improvements in the continuous speech scenario. On the contrary, spectral-based features greatly contribute to Thai tone classification in the continuous-speech scenario, whereas spectral-based features slightly degrade performances in the isolated-word scenario. The best achievement in the continuous-speech scenario is obtained from the PLP-based feature, which yields an error rate reduction of 13.90\%. Therefore, findings in this article are that energy values and spectral-based features, especially the PLP-based feature, are the main contributors to the improvement of the performances of Thai tone classification in the isolated-word scenario and the continuous-speech scenario, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chu:2016:IPS, author = "Chenhui Chu and Toshiaki Nakazawa and Sadao Kurohashi", title = "Integrated Parallel Sentence and Fragment Extraction from Comparable Corpora: a Case Study on {Chinese--Japanese} {Wikipedia}", journal = j-TALLIP, volume = "15", number = "2", pages = "10:1--10:??", month = feb, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2833089", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Parallel corpora are crucial for statistical machine translation (SMT); however, they are quite scarce for most language pairs and domains. As comparable corpora are far more available, many studies have been conducted to extract either parallel sentences or fragments from them for SMT. In this article, we propose an integrated system to extract both parallel sentences and fragments from comparable corpora. We first apply parallel sentence extraction to identify parallel sentences from comparable sentences. We then extract parallel fragments from the comparable sentences. Parallel sentence extraction is based on a parallel sentence candidate filter and classifier for parallel sentence identification. We improve it by proposing a novel filtering strategy and three novel feature sets for classification. Previous studies have found it difficult to accurately extract parallel fragments from comparable sentences. We propose an accurate parallel fragment extraction method that uses an alignment model to locate the parallel fragment candidates and an accurate lexicon-based filter to identify the truly parallel fragments. A case study on the Chinese--Japanese Wikipedia indicates that our proposed methods outperform previously proposed methods, and the parallel data extracted by our system significantly improves SMT performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2016:CCSb, author = "Rui Wang and Masao Utiyama and Isao Goto and Eiichiro Sumita and Hai Zhao and Bao-Liang Lu", title = "Converting Continuous-Space Language Models into {$N$}-gram Language Models with Efficient Bilingual Pruning for Statistical Machine Translation", journal = j-TALLIP, volume = "15", number = "3", pages = "11:1--11:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2843942", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The Language Model (LM) is an essential component of Statistical Machine Translation (SMT). In this article, we focus on developing efficient methods for LM construction. Our main contribution is that we propose a Natural N -grams based Converting (NNGC) method for transforming a Continuous-Space Language Model (CSLM) to a Back-off N -gram Language Model (BNLM). Furthermore, a Bilingual LM Pruning (BLMP) approach is developed for enhancing LMs in SMT decoding and speeding up CSLM converting. The proposed pruning and converting methods can convert a large LM efficiently by working jointly. That is, a LM can be effectively pruned before it is converted from CSLM without sacrificing performance, and further improved if an additional corpus contains out-of-domain information. For different SMT tasks, our experimental results indicate that the proposed NNGC and BLMP methods outperform the existing counterpart approaches significantly in BLEU and computational cost.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chakrabarty:2016:BBL, author = "Abhisek Chakrabarty and Utpal Garain", title = "{BenLem} (A {Bengali} Lemmatizer) and Its Role in {WSD}", journal = j-TALLIP, volume = "15", number = "3", pages = "12:1--12:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2835494", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A lemmatization algorithm for Bengali has been developed and evaluated. Its effectiveness for word sense disambiguation (WSD) is also investigated. One of the key challenges for computer processing of highly inflected languages is to deal with the frequent morphological variations of the root words appearing in the text. Therefore, a lemmatizer is essential for developing natural language processing (NLP) tools for such languages. In this experiment, Bengali, which is the national language of Bangladesh and the second most popular language in the Indian subcontinent, has been taken as a reference. In order to design the Bengali lemmatizer (named as BenLem), possible transformations through which surface words are formed from lemmas are studied so that appropriate reverse transformations can be applied on a surface word to get the corresponding lemma back. BenLem is found to be capable of handling both inflectional and derivational morphology in Bengali. It is evaluated on a set of 18 news articles taken from the FIRE Bengali News Corpus consisting of 3,342 surface words (excluding proper nouns) and found to be 81.95\% accurate. The role of the lemmatizer is then investigated for Bengali WSD. Ten highly polysemous Bengali words are considered for sense disambiguation. The FIRE corpus and a collection of Tagore's short stories are considered for creating the WSD dataset. Different WSD systems are considered for this experiment, and it is noticed that BenLem improves the performance of all the WSD systems and the improvements are statistically significant.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2016:ESR, author = "Hao Zhou and Shujian Huang and Junsheng Zhou and Yue Zhang and Huadong Chen and Xinyu Dai and Chuan Cheng and Jiajun Chen", title = "Enhancing Shift--Reduce Constituent Parsing with Action {$N$}-Gram Model", journal = j-TALLIP, volume = "15", number = "3", pages = "13:1--13:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2820902", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Current shift-reduce parsers ``understand'' the context by embodying a large number of binary indicator features with a discriminative model. In this article, we propose the action n-gram model, which utilizes the action sequence to help parsing disambiguation. The action n-gram model is trained on action sequences produced by parsers with the n-gram estimation method, which gives a smoothed maximum likelihood estimation of the action probability given a specific action history. We show that incorporating action n-gram models into a state-of-the-art parsing framework could achieve parsing accuracy improvements on three datasets across two languages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sadek:2016:EAC, author = "Jawad Sadek and Farid Meziane", title = "Extracting {Arabic} Causal Relations Using Linguistic Patterns", journal = j-TALLIP, volume = "15", number = "3", pages = "14:1--14:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2800786", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Identifying semantic relations is a crucial step in discourse analysis and is useful for many applications in both language and speech technology. Automatic detection of Causal relations therefore has gained popularity in the literature within different frameworks. The aim of this article is the automatic detection and extraction of Causal relations that are explicitly expressed in Arabic texts. To fulfill this goal, a Pattern Recognizer model was developed to signal the presence of cause--effect information within sentences from nonspecific domain texts. This model incorporates approximately 700 linguistic patterns so that parts of the sentence representing the cause and those representing the effect can be distinguished. The patterns were constructed based on different sets of syntactic features by analyzing a large untagged Arabic corpus. In addition, the model was boosted with three independent algorithms to deal with certain types of grammatical particles that indicate causation. With this approach, the proposed model achieved an overall recall of 81\% and a precision of 78\%. Evaluation results revealed that the justification particles play a key role in detecting Causal relations. To the best of our knowledge, no previous studies have been dedicated to dealing with this type of relation in the Arabic language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2016:BSR, author = "Haitong Yang and Yu Zhou and Chengqing Zong", title = "Bilingual Semantic Role Labeling Inference via Dual Decomposition", journal = j-TALLIP, volume = "15", number = "3", pages = "15:1--15:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2835493", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article focuses on bilingual Semantic Role Labeling (SRL); its goal is to annotate semantic roles on both sides of the parallel bilingual texts (bi-texts). Since rich bilingual information is encoded, bilingual SRL has been applied in many natural-language processing (NLP) tasks such as machine translation (MT), cross-lingual information retrieval (IR), and the like. A feasible way of performing bilingual SRL is using monolingual SRL systems to perform SRL on each side of bi-texts separately. However, it is difficult to obtain consistent SRL results on both sides of bi-texts in this way. Some works have tried to jointly infer bilingual SRL because there are many complementary language cues on both sides of bi-texts and they reported better performance than monolingual systems. However, there are two limits in the existing methods. First, the existing methods often require high inference costs due to the complex objective function. Second, the existing methods fully adopt the candidates generated by monolingual SRL systems, but many candidates are discarded in the argument pruning or identification stage of monolingual systems. In this article, we propose two strategies to overcome these limits. We utilize a simple but efficient technique: Dual Decomposition to search for consistent results for both sides of bi-texts. On the other hand, we propose a method called Bi-Directional Projection (BDP) to recover arguments discarded in monolingual SRL systems. We evaluate our method on a standard parallel benchmark: the OntoNotes dataset. The experimental results show that our method yields significant improvements over the state-of-the-art monolingual systems. In addition, our approach is also better and faster than existing methods due to BDP and Dual Decomposition.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2016:MMC, author = "Maoxi Li and Mingwen Wang and Hanxi Li and Fan Xu", title = "Modeling Monolingual Character Alignment for Automatic Evaluation of {Chinese} Translation", journal = j-TALLIP, volume = "15", number = "3", pages = "16:1--16:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2815619", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Automatic evaluation of machine translations is an important task. Most existing evaluation metrics rely on matching the same word or letter n -grams. This strategy leads to poor results on Chinese translations because one has to rely merely on matching identical characters. In this article, we propose a new evaluation metric that allows different characters with the same or similar meaning to match. An Indirect Hidden Markov Model (IHMM) is proposed to align the Chinese translation with human references at the character level. In the model, the emission probabilities are estimated by character similarity, including character semantic similarity and character surface similarity, and transition probabilities are estimated by a heuristic distance-based distortion model. When evaluating the submitted output of English-to-Chinese translation systems in the IWSLT'08 CT-EC and NIST'08 EC tasks, the experimental results indicate that the proposed metric has a significantly better correlation with human evaluation than the state-of-the-art machine translation metrics (i.e., BLEU, Meteor Universal, and TESLA-CELAB). This study shows that it is important to allow different characters to match in the evaluation of Chinese translations and that the IHMM is a reasonable approach for the alignment of Chinese characters.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abuaiadah:2016:UBM, author = "Diab Abuaiadah", title = "Using Bisect {$K$}-Means Clustering Technique in the Analysis of {Arabic} Documents", journal = j-TALLIP, volume = "15", number = "3", pages = "17:1--17:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2812809", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, I have investigated the performance of the bisect K-means clustering algorithm compared to the standard K-means algorithm in the analysis of Arabic documents. The experiments included five commonly used similarity and distance functions (Pearson correlation coefficient, cosine, Jaccard coefficient, Euclidean distance, and averaged Kullback--Leibler divergence) and three leading stemmers. Using the purity measure, the bisect K-means clearly outperformed the standard K-means in all settings with varying margins. For the bisect K-means, the best purity reached 0.927 when using the Pearson correlation coefficient function, while for the standard K-means, the best purity reached 0.884 when using the Jaccard coefficient function. Removing stop words significantly improved the results of the bisect K-means but produced minor improvements in the results of the standard K-means. Stemming provided additional minor improvement in all settings except the combination of the averaged Kullback--Leibler divergence function and the root-based stemmer, where the purity was deteriorated by more than 10\%. These experiments were conducted using a dataset with nine categories, each of which contains 300 documents.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Elayeb:2016:ACL, author = "Bilel Elayeb and Ibrahim Bounhas", title = "{Arabic} Cross-Language Information Retrieval: a Review", journal = j-TALLIP, volume = "15", number = "3", pages = "18:1--18:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2789210", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Cross-language information retrieval (CLIR) deals with retrieving relevant documents in one language using queries expressed in another language. As CLIR tools rely on translation techniques, they are challenged by the properties of highly derivational and flexional languages like Arabic. Much work has been done on CLIR for different languages including Arabic. In this article, we introduce the reader to the motivations for solving some problems related to Arabic CLIR approaches. The evaluation of these approaches is discussed starting from the 2001 and 2002 TREC Arabic CLIR tracks, which aim to objectively evaluate CLIR systems. We also study many other research works to highlight the unresolved problems or those that require further investigation. These works are discussed in the light of a deep study of the specificities and the tasks of Arabic information retrieval (IR). Particular attention is given to translation techniques and CLIR resources, which are key issues challenging Arabic CLIR. To push research in this field, we discuss how a new standard collection can improve Arabic IR and CLIR tracks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2016:ALM, author = "Yinggong Zhao and Shujian Huang and Xin-Yu Dai and Jiajun Chen", title = "Adaptation of Language Models for {SMT} Using Neural Networks with Topic Information", journal = j-TALLIP, volume = "15", number = "3", pages = "19:1--19:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2816816", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Neural network language models (LMs) are shown to be effective in improving the performance of statistical machine translation (SMT) systems. However, state-of-the-art neural network LMs usually use words before the current position as context and neglect global topic information, which can help machine translation (MT) systems to select better translation candidates from a higher perspective. In this work, we propose improvement of the state-of-the-art feedforward neural language model with topic information. Two main issues need to be tackled when adding topics into neural network LMs for SMT: one is how to incorporate topics to the neural network; the other is how to get target-side topic distribution before translation. We incorporate topics by appending topic distribution to the input layer of a feedforward LM. We adopt a multinomial logistic-regression (MLR) model to predict the target-side topic distribution based on source side information. Moreover, we propose a feedforward neural network model to learn joint representations on the source side for topic prediction. LM experiments demonstrate that the perplexity on validation set can be greatly reduced by the topic-enhanced feedforward LM, and the prediction of target-side topics can be improved dramatically with the MLR model equipped with the joint source representations. A final MT experiment, conducted on a large-scale Chinese--English dataset, shows that our feedforward LM with predicted topics improves the translation performance against a strong baseline.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2016:IIE, author = "Chenchen Ding and Keisuke Sakanushi and Hirona Touji and Mikio Yamamoto", title = "Inter-, Intra-, and Extra-Chunk Pre-Ordering for Statistical {Japanese}-to-{English} Machine Translation", journal = j-TALLIP, volume = "15", number = "3", pages = "20:1--20:??", month = mar, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2818381", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:50 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A rule-based pre-ordering approach is proposed for statistical Japanese-to-English machine translation using the dependency structure of source-side sentences. A Japanese sentence is pre-ordered to an English-like order at the morpheme level for a statistical machine translation system during the training and decoding phase to resolve the reordering problem. In this article, extra-chunk pre-ordering of morphemes is proposed, which allows Japanese functional morphemes to move across chunk boundaries. This contrasts with the intra-chunk reordering used in previous approaches, which restricts the reordering of morphemes within a chunk. Linguistically oriented discussions show that correct pre-ordering cannot be realized without extra-chunk movement of morphemes. The proposed approach is compared with five rule-based pre-ordering approaches designed for Japanese-to-English translation and with a language independent statistical pre-ordering approach on a standard patent dataset and on a news dataset obtained by crawling Internet news sites. Two state-of-the-art statistical machine translation systems, one phrase-based and the other hierarchical phrase-based, are used in experiments. Experimental results show that the proposed approach outperforms the compared approaches on automatic reordering measures (Kendall's $ \tau $, Spearman's $ \rho $, fuzzy reordering score, and test set RIBES) and on the automatic translation precision measure of test set BLEU score.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lee:2015:ISI, author = "Lung-Hao Lee and Gina-Anne Levow and Shih-Hung Wu and Chao-Lin Liu", title = "Introduction to the Special Issue on {Chinese} Spell Checking", journal = j-TALLIP, volume = "14", number = "4", pages = "14:1--14:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2818354", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", abstract = "This special issue contains four articles based on and expanded from systems presented at the SIGHAN-7 Chinese Spelling Check Bakeoff. We provide an overview of the approaches and designs for Chinese spelling checkers presented in these articles. We conclude this introductory article with a summary of possible future directions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2015:PFC, author = "Kuan-Yu Chen and Hsin-Min Wang and Hsin-Hsi Chen", title = "A Probabilistic Framework for {Chinese} Spelling Check", journal = j-TALLIP, volume = "14", number = "4", pages = "15:1--15:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2826234", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", abstract = "Chinese spelling check (CSC) is still an unsolved problem today since there are many homonymous or homomorphous characters. Recently, more and more CSC systems have been proposed. To the best of our knowledge, language modeling is one of the major components among these systems because of its simplicity and moderately good predictive power. After deeply analyzing the school of research, we are aware that most of the systems only employ the conventional n -gram language models. The contributions of this article are threefold. First, we propose a novel probabilistic framework for CSC, which naturally combines several important components, such as the substitution model and the language model, to inherit their individual merits as well as to overcome their limitations. Second, we incorporate the topic language models into the CSC system in an unsupervised fashion. The topic language models can capture the long-span semantic information from a word (character) string while the conventional n -gram language models can only preserve the local regularity information. Third, we further integrate Web resources with the proposed framework to enhance the overall performance. Our rigorously empirical experiments demonstrate the consistent and utility performance of the proposed framework in the CSC task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2015:HRA, author = "Xiaodong Liu and Fei Cheng and Kevin Duh and Yuji Matsumoto", title = "A Hybrid Ranking Approach to {Chinese} Spelling Check", journal = j-TALLIP, volume = "14", number = "4", pages = "16:1--16:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2822264", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", abstract = "We propose a novel framework for Chinese Spelling Check (CSC), which is an automatic algorithm to detect and correct Chinese spelling errors. Our framework contains two key components: candidate generation and candidate ranking. Our framework differs from previous research, such as Statistical Machine Translation (SMT) based model or Language Model (LM) based model, in that we use both SMT and LM models as components of our framework for generating the correction candidates, in order to obtain maximum recall; to improve the precision, we further employ a Support Vector Machines (SVM) classifier to rank the candidates generated by the SMT and the LM. Experiments show that our framework outperforms other systems, which adopted the same or similar resources as ours in the SIGHAN 7 shared task; even comparing with the state-of-the-art systems, which used more resources, such as a considerable large dictionary, an idiom dictionary and other semantic information, our framework still obtains competitive results. Furthermore, to address the resource scarceness problem for training the SMT model, we generate around 2 million artificial training sentences using the Chinese character confusion sets, which include a set of Chinese characters with similar shapes and similar pronunciations, provided by the SIGHAN 7 shared task.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yeh:2015:CSC, author = "Jui-Feng Yeh and Wen-Yi Chen and Mao-Chuan Su", title = "{Chinese} Spelling Checker Based on an Inverted Index List with a Rescoring Mechanism", journal = j-TALLIP, volume = "14", number = "4", pages = "17:1--17:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2826235", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", abstract = "An approach is proposed for Chinese spelling error detection and correction, in which an inverted index list with a rescoring mechanism is used. The inverted index list is a structure for mapping from word to desired sentence, and for representing nodes in lattices constructed through character expansion (according to predefined phonologically and visually similar character sets). Pruning based on a contextual dependency confidence measure was used to markedly reduce the search space and computational complexity. Relevant mapping relations between the original input and desired input were obtained using a scoring mechanism composed of class-based language and maximum entropy correction models containing character, word, and contextual features. The proposed method was evaluated using data sets provided by SigHan 7 bakeoff. The experimental results show that the proposed method achieved acceptable performance in terms of recall rate or precision rate in error sentence detection and error location detection, and it outperformed other approaches in error location detection and correction.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hsieh:2015:CCS, author = "Yu-Ming Hsieh and Ming-Hong Bai and Shu-Ling Huang and Keh-Jiann Chen", title = "Correcting {Chinese} Spelling Errors with Word Lattice Decoding", journal = j-TALLIP, volume = "14", number = "4", pages = "18:1--18:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2791389", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", abstract = "Chinese spell checkers are more difficult to develop because of two language features: (1) there are no word boundaries, and a character may function as a word or a word morpheme; and (2) the Chinese character set contains more than ten thousand characters. The former makes it difficult for a spell checker to detect spelling errors, and the latter makes it difficult for a spell checker to construct error models. We develop a word lattice decoding model for a Chinese spell checker that addresses these difficulties. The model performs word segmentation and error correction simultaneously, thereby solving the word boundary problem. The model corrects nonword errors as well as real-word errors. In order to better estimate the error distribution of large character sets for error models, we also propose a methodology to extract spelling error samples automatically from the Google web 1T corpus. Due to the large quantity of data in the Google web 1T corpus, many spelling error samples can be extracted, better reflecting spelling error distributions in the real world. Finally, in order to improve the spell checker for real applications, we produce $n$-best suggestions for spelling error corrections. We test our proposed approach with the Bakeoff 2013 CSC Datasets; the results show that the proposed methods with the error model significantly outperform the performance of Chinese spell checkers that do not use error models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Anonymous:2015:TPE, author = "Anonymous", title = "{TALLIP} Perspectives: Editorial Commentary: The State of the Journal", journal = j-TALLIP, volume = "14", number = "4", pages = "19:1--19:??", month = oct, year = "2015", CODEN = "????", DOI = "https://doi.org/10.1145/2823512", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:49 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", note = "Special issue on Chinese spell checking.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hakro:2016:PTI, author = "Dil Nawaz Hakro and Abdullah Zawawi Talib", title = "Printed Text Image Database for {Sindhi} {OCR}", journal = j-TALLIP, volume = "15", number = "4", pages = "21:1--21:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846093", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Document Image Understanding (DIU) and Electronic Document Management are active fields of research involving image understanding, interpretation, efficient handling, and routing of documents as well as their retrieval. Research on most of the noncursive scripts (Latin) has matured, whereas research on the cursive (connected) scripts is still moving toward perfection. Many researchers are currently working on the cursive scripts (Arabic and other scripts adopting it) around the world so that the difficulties and challenges in document understanding and handling of these scripts can be overcome. Sindhi script has the largest extension of the original Arabic alphabet among languages adopting the Arabic script; it contains 52 characters, compared to 28 characters in the original Arabic alphabet, in order to accommodate more sounds for the language. There are 24 differentiating characters with some possessing four dots. For Sindhi OCR research and development, a database is needed for training and testing of Sindhi text images. We have developed a large database containing over 4 billion words and 15 billion characters in 150 various fonts in four font weights and four styles. The database contents were collected from various sources including websites, books, and theses. A custom-built application was also developed to create a text image from a text document that supports various fonts and sizes. The database considers words, characters, characters with spaces, and lines. The database is freely available as a partial or full database by sending an email to one of the authors.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2016:WSB, author = "Chenchen Ding and Ye Kyaw Thu and Masao Utiyama and Eiichiro Sumita", title = "Word Segmentation for {Burmese} ({Myanmar})", journal = j-TALLIP, volume = "15", number = "4", pages = "22:1--22:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2846095", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Experiments on various word segmentation approaches for the Burmese language are conducted and discussed in this note. Specifically, dictionary-based, statistical, and machine learning approaches are tested. Experimental results demonstrate that statistical and machine learning approaches perform significantly better than dictionary-based approaches. We believe that this note, based on an annotated corpus of relatively considerable size (containing approximately a half million words), is the first systematic comparison of word segmentation approaches for Burmese. This work aims to discover the properties and proper approaches to Burmese textual processing and to promote further researches on this understudied language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2016:ITP, author = "Tongtao Zhang and Aritra Chowdhury and Nimit Dhulekar and Jinjing Xia and Kevin Knight and Heng Ji and B{\"u}lent Yener and Liming Zhao", title = "From Image to Translation: Processing the Endangered {Nyushu} Script", journal = j-TALLIP, volume = "15", number = "4", pages = "23:1--23:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2857052", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The lack of computational support has significantly slowed down automatic understanding of endangered languages. In this paper, we take Nyushu (simplified Chinese: [Chinese characters]; literally: ``women's writing'') as a case study to present the first computational approach that combines Computer Vision and Natural Language Processing techniques to deeply understand an endangered language. We developed an end-to-end system to read a scanned hand-written Nyushu article, segment it into characters, link them to standard characters, and then translate the article into Mandarin Chinese. We propose several novel methods to address the new challenges introduced by noisy input and low resources, including Nyushu-specific feature selection for character segmentation and linking, and character linking lattice based Machine Translation. The end-to-end system performance indicates that the system is a promising approach and can serve as a standard benchmark.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarigil:2016:SPW, author = "Erdem Sarigil and Oguz Yilmaz and Ismail Sengor Altingovde and Rifat Ozcan and {\"O}zg{\"U}r Ulusoy", title = "A {``Suggested''} Picture of {Web} Search in {Turkish}", journal = j-TALLIP, volume = "15", number = "4", pages = "24:1--24:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2891105", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Although query log analysis provides crucial insights about Web users' search interests, conducting such analyses is almost impossible for some languages, as large-scale and public query logs are quite scarce. In this study, we first survey the existing query collections in Turkish and discuss their limitations. Next, we adopt a novel strategy to obtain a set of Turkish queries using the query autocompletion services from the four major search engines and provide the first large-scale analysis of Web queries and their results in Turkish.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Goswami:2016:CPG, author = "Mukesh M. Goswami and Suman K. Mitra", title = "Classification of Printed {Gujarati} Characters Using Low-Level Stroke Features", journal = j-TALLIP, volume = "15", number = "4", pages = "25:1--25:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2856105", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article presents an elegant technique for extracting the low-level stroke features, such as endpoints, junction points, line elements, and curve elements, from offline printed text using a template matching approach. The proposed features are used to classify a subset of characters from Gujarati script. The database consists of approximately 16,782 samples of 42 middle-zone symbols from the Gujarati character set collected from three different sources: machine printed books, newspapers, and laser printed documents. The purpose of this division is to add variety in terms of size, font type, style, ink variation, and boundary deformation. The experiments are performed on the database using a k-nearest neighbor (kNN) classifier and results are compared with other widely used structural features, namely Chain Codes (CC), Directional Element Features (DEF), and Histogram of Oriented Gradients (HoG). The results show that the features are quite robust against the variations and give comparable performance with other existing works.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Choudhary:2016:FTA, author = "Prakash Choudhary and Neeta Nain", title = "A Four-Tier Annotated {Urdu} Handwritten Text Image Dataset for Multidisciplinary Research on {Urdu} Script", journal = j-TALLIP, volume = "15", number = "4", pages = "26:1--26:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2857053", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article introduces a large handwritten text document image corpus dataset for Urdu script named CALAM (Cursive And Language Adaptive Methodologies). The database contains unconstrained handwritten sentences along with their structural annotations for the offline handwritten text images with their XML representation. Urdu is the fourth most frequently used language in the world, but due to its complex cursive writing script and low resources, it is still a thrust area for document image analysis. Here, a unified approach is applied in the development of an Urdu corpus by collecting printed texts, handwritten texts, and demographic information of writers on a single form. CALAM contains 1,200 handwritten text images, 3,043 lines, 46,664 words, and 101,181 ligatures. For capturing maximum variance among the words and handwritten styles, data collection is distributed among six categories and 14 subcategories. Handwritten forms were filled out by 725 different writers belonging to different geographical regions, ages, and genders with diverse educational backgrounds. A structure has been designed to annotate handwritten Urdu script images at line, word, and ligature levels with an XML standard to provide a ground truth of each image at different levels of annotation. This corpus would be very useful for linguistic research in benchmarking and providing a testbed for evaluation of handwritten text recognition techniques for Urdu script, signature verification, writer identification, digital forensics, classification of printed and handwritten text, categorization of texts as per use, and so on. The experimental results of some recently developed handwritten text line segmentation techniques experimented on the proposed dataset are also presented in the article for asserting its viability and usability.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Norimatsu:2016:FCL, author = "Jun-Ya Norimatsu and Makoto Yasuhara and Toru Tanaka and Mikio Yamamoto", title = "A Fast and Compact Language Model Implementation Using Double-Array Structures", journal = j-TALLIP, volume = "15", number = "4", pages = "27:1--27:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2873068", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The language model is a widely used component in fields such as natural language processing, automatic speech recognition, and optical character recognition. In particular, statistical machine translation uses language models, and the translation speed and the amount of memory required are greatly affected by the performance of the language model implementation. We propose a fast and compact implementation of n -gram language models that increases query speed and reduces memory usage by using a double-array structure, which is known to be a fast and compact trie data structure. We propose two types of implementation: one for backward suffix trees and the other for reverse tries. The data structure is optimized for space efficiency by embedding model parameters into otherwise unused spaces in the double-array structure. We show that the reverse trie version of our method is among the smallest state-of-the-art implementations in terms of model size with almost the same speed as the implementation that performs fastest on perplexity calculation tasks. Similarly, we achieve faster decoding while keeping compact model sizes, and we confirm that our method can utilize the efficiency of the double-array structure to achieve a balance between speed and size on translation tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2016:LGF, author = "Haitong Yang and Chengqing Zong", title = "Learning Generalized Features for Semantic Role Labeling", journal = j-TALLIP, volume = "15", number = "4", pages = "28:1--28:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2890496", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article makes an effort to improve Semantic Role Labeling (SRL) through learning generalized features. The SRL task is usually treated as a supervised problem. Therefore, a huge set of features are crucial to the performance of SRL systems. But these features often lack generalization powers when predicting an unseen argument. This article proposes a simple approach to relieve the issue. A strong intuition is that arguments occurring in similar syntactic positions are likely to bear the same semantic role, and, analogously, arguments that are lexically similar are likely to represent the same semantic role. Therefore, it will be informative to SRL if syntactic or lexical similar arguments can activate the same feature. Inspired by this, we embed the information of lexicalization and syntax into a feature vector for each argument and then use K -means to make clustering for all feature vectors of training set. For an unseen argument to be predicted, it will belong to the same cluster as its similar arguments of training set. Therefore, the clusters can be thought of as a kind of generalized feature. We evaluate our method on several benchmarks. The experimental results show that our approach can significantly improve the SRL performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhowmik:2016:BHC, author = "Tapan Kumar Bhowmik and Swapan Kumar Parui and Utpal Roy and Lambert Schomaker", title = "{Bangla} Handwritten Character Segmentation Using Structural Features: a Supervised and Bootstrapping Approach", journal = j-TALLIP, volume = "15", number = "4", pages = "29:1--29:??", month = jun, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2890497", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we propose a new framework for segmentation of Bangla handwritten word images into meaningful individual symbols or pseudo-characters. Existing segmentation algorithms are not usually treated as a classification problem. However, in the present study, the segmentation algorithm is looked upon as a two-class supervised classification problem. The method employs an SVM classifier to select the segmentation points on the word image on the basis of various structural features. For training of the SVM classifier, an unannotated training set is prepared first using candidate segmenting points. The training set is then clustered, and each cluster is labeled manually with minimal manual intervention. A semi-automatic bootstrapping technique is also employed to enlarge the training set from new samples. The overall architecture describes a basic step toward building an annotation system for the segmentation problem, which has not so far been investigated. The experimental results show that our segmentation method is quite efficient in segmenting not only word images but also handwritten texts. As a part of this work, a database of Bangla handwritten word images has also been developed. Considering our data collection method and a statistical analysis of our lexicon set, we claim that the relevant characteristics of an ideal lexicon set are present in our handwritten word image database.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2016:OHG, author = "Sukhdeep Singh and Anuj Sharma and Indu Chhabra", title = "Online Handwritten {Gurmukhi} Strokes Dataset Based on Minimal Set of Words", journal = j-TALLIP, volume = "16", number = "1", pages = "1:1--1:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2896318", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The online handwriting data are an integral part of data analysis and classification research, as collected handwritten data offers many challenges to group handwritten stroke classes. The present work has been done for grouping handwritten strokes from the Indic script Gurmukhi. Gurmukhi is the script of the popular and widely spoken language Punjabi. The present work includes development of the dataset of Gurmukhi words in the context of online handwriting recognition for real-life use applications, such as maps navigation. We have collected the data of 100 writers from the largest cities in the Punjab region. The writers' variations, such as writing skill level (beginner, moderate, and expert), gender, right or left handedness, and their adaptability to digital handwriting, have been considered in dataset development. We have introduced a novel technique to form handwritten stroke classes based on a limited set of words. The presence of all alphabets including vowels of Gurmukhi script has been considered before selection of a word. The developed dataset includes 39,411 strokes from handwritten words and forms 72 classes of strokes after using a k-means clustering technique and manual verification through expert and moderate writers. We have achieved recognition results using the Hidden Markov Model as 87.10\%, 85.43\%, and 84.33\% for middle zone strokes when using training data as 66\%, 50\%, and 80\% of the developed dataset. The present work is a step in a direction to find groups for unknown handwriting strokes with reasonably higher levels of accuracy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{El-Fiqi:2016:PCC, author = "Heba El-Fiqi and Eleni Petraki and Hussein A. Abbass", title = "Pairwise Comparative Classification for Translator Stylometric Analysis", journal = j-TALLIP, volume = "16", number = "1", pages = "2:1--2:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2898997", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we present a new type of classification problem, which we call Comparative Classification Problem (CCP), where we use the term data record to refer to a block of instances. Given a single data record with n instances for n classes, the CCP problem is to map each instance to a unique class. This problem occurs in a wide range of applications where the independent and identically distributed assumption is broken down. The primary difference between CCP and classical classification is that in the latter, the assignment of a translator to one record is independent of the assignment of a translator to a different record. In CCP, however, the assignment of a translator to one record within a block excludes this translator from further assignments to any other record in that block. The interdependency in the data poses challenges for techniques relying on the independent and identically distributed (iid) assumption. In the Pairwise CCP (PWCCP), a pair of records is grouped together. The key difference between PWCCP and classical binary classification problems is that hidden patterns can only be unmasked by comparing the instances as pairs. In this article, we introduce a new algorithm, PWC4.5, which is based on C4.5, to manage PWCCP. We first show that a simple transformation-that we call Gradient-Based Transformation (GBT)-can fix the problem of iid in C4.5. We then evaluate PWC4.5 using two real-world corpora to distinguish between translators on Arabic-English and French-English translations. While the traditional C4.5 failed to distinguish between different translators, GBT demonstrated better performance. Meanwhile, PWC4.5 consistently provided the best results over C4.5 and GBT.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qiao:2016:IUD, author = "Xiuming Qiao and Hailong Cao and Tiejun Zhao", title = "Improving Unsupervised Dependency Parsing with Knowledge from Query Logs", journal = j-TALLIP, volume = "16", number = "1", pages = "3:1--3:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2903720", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Unsupervised dependency parsing becomes more and more popular in recent years because it does not need expensive annotations, such as treebanks, which are required for supervised and semi-supervised dependency parsing. However, its accuracy is still far below that of supervised dependency parsers, partly due to the fact that their parsing model is insufficient to capture linguistic phenomena underlying texts. The performance for unsupervised dependency parsing can be improved by mining knowledge from the texts and by incorporating it into the model. In this article, syntactic knowledge is acquired from query logs to help estimate better probabilities in dependency models with valence. The proposed method is language independent and obtains an improvement of 4.1\% unlabeled accuracy on the Penn Chinese Treebank by utilizing additional dependency relations from the Sogou query logs and Baidu query logs. Morever, experiments show that the proposed model achieves improvements of 8.07\% on CoNLL 2007 English using the AOL query logs. We believe query logs are useful sources of syntactic knowledge for many natural language processing (NLP) tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Passban:2016:BNP, author = "Peyman Passban and Qun Liu and Andy Way", title = "Boosting Neural {POS} Tagger for {Farsi} Using Morphological Information", journal = j-TALLIP, volume = "16", number = "1", pages = "4:1--4:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2934676", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Farsi (Persian) is a low-resource language that suffers from the data sparsity problem and a lack of efficient processing tools. Due to their broad application in natural language processing tasks, part-of-speech (POS) taggers are one of those important tools that should be considered in this respect. Despite recent work on Farsi tagging, there is still room for improvement. The best reported accuracy so far is 96\%, which in special cases can rise to 96.9\%. The main problem with existing taggers is their inefficiency in coping with out-of-vocabulary (OOV) words. Addressing both problems of accuracy and OOV words, we developed a neural network-based POS tagger (NPT) that performs efficiently on Farsi. Despite using less data, NPT provides better results in comparison to state-of-the-art systems. Our proposed tagger performs with an accuracy of 97.4\%, with performance highly influenced by morphological features. We carry out a shallow morphological analysis and show considerable improvement over the baseline configuration.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2016:SBM, author = "Liangliang Liu and Cungen Cao", title = "A Seed-Based Method for Generating {Chinese} Confusion Sets", journal = j-TALLIP, volume = "16", number = "1", pages = "5:1--5:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2933396", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In natural language, people often misuse a word (called a ``confused word'') in place of other words (called ``confusing words''). In misspelling corrections, many approaches to finding and correcting misspelling errors are based on a simple notion called a ``confusion set.'' The confusion set of a confused word consists of confusing words. In this article, we propose a new method of building Chinese character confusion sets. Our method is composed of two major phases. In the first phase, we build a list of seed confusion sets for each Chinese character, which is based on measuring similarity in character pinyin or similarity in character shape. In this phase, all confusion sets are constructed manually, and the confusion sets are organized into a graph, called a ``seed confusion graph'' (SCG), in which vertices denote characters and edges are pairs of characters in the form (confused character, confusing character). In the second phase, we extend the SCG by acquiring more pairs of (confused character, confusing character) from a large Chinese corpus. For this, we use several word patterns (or patterns) to generate new confusion pairs and then verify the pairs before adding them into a SCG. Comprehensive experiments show that our method of extending confusion sets is effective. Also, we shall use the confusion sets in Chinese misspelling corrections to show the utility of our method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2016:ISP, author = "Junhui Li and Muhua Zhu and Wei Lu and Guodong Zhou", title = "Improving Semantic Parsing with Enriched Synchronous Context-Free Grammars in Statistical Machine Translation", journal = j-TALLIP, volume = "16", number = "1", pages = "6:1--6:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2963099", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Semantic parsing maps a sentence in natural language into a structured meaning representation. Previous studies show that semantic parsing with synchronous context-free grammars (SCFGs) achieves favorable performance over most other alternatives. Motivated by the observation that the performance of semantic parsing with SCFGs is closely tied to the translation rules, this article explores to extend translation rules with high quality and increased coverage in three ways. First, we examine the difference between word alignments for semantic parsing and statistical machine translation (SMT) to better adapt word alignment in SMT to semantic parsing. Second, we introduce both structure and syntax informed nonterminals, better guiding the parsing in favor of well-formed structure, instead of using a uninformed nonterminal in SCFGs. Third, we address the unknown word translation issue via synthetic translation rules. Last but not least, we use a filtering approach to improve performance via predicting answer type. Evaluation on the standard GeoQuery benchmark dataset shows that our approach greatly outperforms the state of the art across various languages, including English, Chinese, Thai, German, and Greek.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Krishnamurthi:2016:UDS, author = "Karthik Krishnamurthi and Vijayapal Reddy Panuganti and Vishnu Vardhan Bulusu", title = "Understanding Document Semantics from Summaries: a Case Study on {Hindi} Texts", journal = j-TALLIP, volume = "16", number = "1", pages = "7:1--7:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2956236", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:51 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Summary of a document contains words that actually contribute to the semantics of the document. Latent Semantic Analysis (LSA) is a mathematical model that is used to understand document semantics by deriving a semantic structure based on patterns of word correlations in the document. When using LSA to capture semantics from summaries, it is observed that LSA performs quite well despite being completely independent of any external sources of semantics. However, LSA can be remodeled to enhance its capability to analyze correlations within texts. By taking advantage of the model being language independent, this article presents two stages of LSA remodeling to understand document semantics in the Indian context, specifically from Hindi text summaries. One stage of remodeling is done by providing supplementary information, such as document category and domain information. The second stage of remodeling is done by using a supervised term weighting measure in the process. The remodeled LSA's performance is empirically evaluated in a document classification application by comparing the accuracies of classification to plain LSA. An improvement in the performance of LSA in the range of 4.7\% to 6.2\% is achieved from the remodel when compared to the plain model. The results suggest that summaries of documents efficiently capture the semantic structure of documents and is an alternative to full-length documents for understanding document semantics.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tursun:2016:STT, author = "Eziz Tursun and Debasis Ganguly and Turghun Osman and Ya-Ting Yang and Ghalip Abdukerim and Jun-Lin Zhou and Qun Liu", title = "A Semisupervised Tag-Transition-Based {Markovian} Model for {Uyghur} Morphology Analysis", journal = j-TALLIP, volume = "16", number = "2", pages = "8:1--8:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2968410", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Morphological analysis, which includes analysis of part-of-speech (POS) tagging, stemming, and morpheme segmentation, is one of the key components in natural language processing (NLP), particularly for agglutinative languages. In this article, we investigate the morphological analysis of the Uyghur language, which is the native language of the people in the Xinjiang Uyghur autonomous region of western China. Morphological analysis of Uyghur is challenging primarily because of factors such as (1) ambiguities arising due to the likelihood of association of a multiple number of POS tags with a word stem or a multiple number of functional tags with a word suffix, (2) ambiguous morpheme boundaries, and (3) complex morphopholonogy of the language. Further, the unavailability of a manually annotated training set in the Uyghur language for the purpose of word segmentation makes Uyghur morphological analysis more difficult. In our proposed work, we address these challenges by undertaking a semisupervised approach of learning a Markov model with the help of a manually constructed dictionary of ``suffix to tag'' mappings in order to predict the most likely tag transitions in the Uyghur morpheme sequence. Due to the linguistic characteristics of Uyghur, we incorporate a prior belief in our model for favoring word segmentations with a lower number of morpheme units. Empirical evaluation of our proposed model shows an accuracy of about 82\%. We further improve the effectiveness of the tag transition model with an active learning paradigm. In particular, we manually investigated a subset of words for which the model prediction ambiguity was within the top 20\%. Manually incorporating rules to handle these erroneous cases resulted in an overall accuracy of 93.81\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nguyen:2016:ACN, author = "Long H. B. Nguyen and Dien Dinh and Phuoc Tran", title = "An Approach to Construct a Named Entity Annotated {English--Vietnamese} Bilingual Corpus", journal = j-TALLIP, volume = "16", number = "2", pages = "9:1--9:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2990191", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Manually constructing an annotated Named Entity (NE) in a bilingual corpus is a time-consuming, labor--intensive, and expensive process, but this is necessary for natural language processing (NLP) tasks such as cross-lingual information retrieval, cross-lingual information extraction, machine translation, etc. In this article, we present an automatic approach to construct an annotated NE in English-Vietnamese bilingual corpus from a bilingual parallel corpus by proposing an aligned NE method. Basing this corpus on a bilingual corpus in which the initial NEs are extracted from its own language separately, the approach tries to correct unrecognized NEs or incorrectly recognized NEs before aligning the NEs by using a variety of bilingual constraints. The generated corpus not only improves the NE recognition results but also creates alignments between English NEs and Vietnamese NEs, which are necessary for training NE translation models. The experimental results show that the approach outperforms the baseline methods effectively. In the English-Vietnamese NE alignment task, the F-measure increases from 68.58\% to 79.77\%. Thanks to the improvement of the NE recognition quality, the proposed method also increases significantly: the F-measure goes from 84.85\% to 88.66\% for the English side and from 75.71\% to 85.55\% for the Vietnamese side. By providing the additional semantic information for the machine translation systems, the BLEU score increases from 33.04\% to 45.11\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chou:2016:BWN, author = "Chien-Lung Chou and Chia-Hui Chang and Ya-Yun Huang", title = "Boosted {Web} Named Entity Recognition via Tri-Training", journal = j-TALLIP, volume = "16", number = "2", pages = "10:1--10:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2963100", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Named entity extraction is a fundamental task for many natural language processing applications on the web. Existing studies rely on annotated training data, which is quite expensive to obtain large datasets, limiting the effectiveness of recognition. In this research, we propose a semisupervised learning approach for web named entity recognition (NER) model construction via automatic labeling and tri-training. The former utilizes structured resources containing known named entities for automatic labeling, while the latter makes use of unlabeled examples to improve the extraction performance. Since this automatically labeled training data may contain noise, a self-testing procedure is used as a follow-up to remove low-confidence annotation and prepare higher-quality training data. Furthermore, we modify tri-training for sequence labeling and derive a proper initialization for large dataset training to improve entity recognition. Finally, we apply this semisupervised learning framework for person name recognition, business organization name recognition, and location name extraction. In the task of Chinese NER, an F-measure of 0.911, 0.849, and 0.845 can be achieved, for person, business organization, and location NER, respectively. The same framework is also applied for English and Japanese business organization name recognition and obtains models with performance of a 0.832 and 0.803 F-measure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sadek:2016:DBA, author = "Jawad Sadek and Farid Meziane", title = "A Discourse-Based Approach for {Arabic} Question Answering", journal = j-TALLIP, volume = "16", number = "2", pages = "11:1--11:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2988238", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The treatment of complex questions with explanatory answers involves searching for arguments in texts. Because of the prominent role that discourse relations play in reflecting text producers' intentions, capturing the underlying structure of text constitutes a good instructor in this issue. From our extensive review, a system for automatic discourse analysis that creates full rhetorical structures in large-scale Arabic texts is currently unavailable. This is due to the high computational complexity involved in processing a large number of hypothesized relations associated with large texts. Therefore, more practical approaches should be investigated. This article presents a new Arabic Text Parser oriented for question-answering systems dealing with [Arabic characters] ``why'' and [Arabic characters] ``how to'' questions. The Text Parser presented here considers the sentence as the basic unit of text and incorporates a set of heuristics to avoid computational explosion. With this approach, the developed question-answering system reached a significant improvement over the baseline with a Recall of 68\% and MRR of 0.62.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tran:2016:WRS, author = "Phuoc Tran and Dien Dinh and Long H. B. Nguyen", title = "Word Re-Segmentation in {Chinese--Vietnamese} Machine Translation", journal = j-TALLIP, volume = "16", number = "2", pages = "12:1--12:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2988237", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In isolated languages, such as Chinese and Vietnamese, words are not separated by spaces, and a word may be formed by one or more syllables. Therefore, word segmentation (WS) is usually the first process that is implemented in the machine translation process. WS in the source and target languages is based on different training corpora, and WS approaches may not be the same. Therefore, the WS that results in these two languages are not often homologous, and thus word alignment results in many 1-n and n-1 alignment pairs in statistical machine translation, which degrades the performance of machine translation. In this article, we will adjust the WS for both Chinese and Vietnamese in particular and for isolated language pairs in general and make the word boundary of the two languages more symmetric in order to strengthen 1-1 alignments and enhance machine translation performance. We have tested this method on the Computational Linguistics Center's corpus, which consists of 35,623 sentence pairs. The experimental results show that our method has significantly improved the performance of machine translation compared to the baseline translation system, WS translation system, and anchor language-based WS translation systems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2016:MSC, author = "Peifeng Li and Guodong Zhou and Qiaoming Zhu", title = "Minimally Supervised {Chinese} Event Extraction from Multiple Views", journal = j-TALLIP, volume = "16", number = "2", pages = "13:1--13:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2994600", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Although several semi-supervised learning models have been proposed for English event extraction, there are few successful stories in Chinese due to its special characteristics. In this article, we propose a novel minimally supervised model for Chinese event extraction from multiple views. Besides the traditional pattern similarity view (PSV), a semantic relationship view (SRV) is introduced to capture the relevant event mentions from relevant documents. Moreover, a morphological structure view (MSV) is incorporated to both infer more positive patterns and help filter negative patterns via morphological structure similarity. An evaluation of the ACE 2005 Chinese corpus shows that our minimally supervised model significantly outperforms several strong baselines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Atreya:2016:QER, author = "Arjun {Atreya V} and Ashish Kankaria and Pushpak Bhattacharyya and Ganesh Ramakrishnan", title = "Query Expansion in Resource-Scarce Languages: a Multilingual Framework Utilizing Document Structure", journal = j-TALLIP, volume = "16", number = "2", pages = "14:1--14:??", month = dec, year = "2016", CODEN = "????", DOI = "https://doi.org/10.1145/2997643", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Retrievals in response to queries to search engines in resource-scarce languages often produce no results, which annoys the user. In such cases, at least partially relevant documents must be retrieved. We propose a novel multilingual framework, MultiStructPRF, which expands the query with related terms by (i) using a resource-rich assisting language and (ii) giving varied importance to the expansion terms depending on their position of occurrence in the document. Our system uses the help of an assisting language to expand the query in order to improve system recall. We propose a systematic expansion model for weighting the expansion terms coming from different parts of the document. To combine the expansion terms from query language and assisting language, we propose a heuristics-based fusion model. Our experimental results show an improvement over other PRF techniques in both precision and recall for multiple resource-scarce languages like Marathi, Bengali, Odia, Finnish, and the like. We study the effect of different assisting languages on precision and recall for multiple query languages. Our experiments reveal an interesting fact: Precision is positively correlated with the typological closeness of query language and assisting language, whereas recall is positively correlated with the resource richness of the assisting language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Finch:2017:IBL, author = "Andrew Finch and Taisuke Harada and Kumiko Tanaka-Ishii and Eiichiro Sumita", title = "Inducing a Bilingual Lexicon from Short Parallel Multiword Sequences", journal = j-TALLIP, volume = "16", number = "3", pages = "15:1--15:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3003726", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article proposes a technique for mining bilingual lexicons from pairs of parallel short word sequences. The technique builds a generative model from a corpus of training data consisting of such pairs. The model is a hierarchical nonparametric Bayesian model that directly induces a bilingual lexicon while training. The model learns in an unsupervised manner and is designed to exploit characteristics of the language pairs being mined. The proposed model is capable of utilizing commonly used word-pair frequency information and additionally can employ the internal character alignments within the words themselves. It is thereby capable of mining transliterations and can use reliably aligned transliteration pairs to support the mining of other words in their context. The model is also capable of performing word reordering and word deletion during the alignment process, and it is furthermore capable of operating in the absence of full segmentation information. In this work, we study two mining tasks based on English-Japanese and English--Chinese language pairs, and compare the proposed approach to baselines based on a simpler models that use only word-pair frequency information. Our results show that the proposed method is able to mine bilingual word pairs at higher levels of precision and recall than the baselines.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2017:CSC, author = "Shaonan Wang and Chengqing Zong", title = "Comparison Study on Critical Components in Composition Model for Phrase Representation", journal = j-TALLIP, volume = "16", number = "3", pages = "16:1--16:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3010088", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Phrase representation, an important step in many NLP tasks, involves representing phrases as continuous-valued vectors. This article presents detailed comparisons concerning the effects of word vectors, training data, and the composition and objective function used in a composition model for phrase representation. Specifically, we first discuss how the augmented word representations affect the performance of the composition model. Then, we investigate whether different types of training data influence the performance of the composition model and, if so, how they influence it. Finally, we evaluate combinations of different composition and objective functions and discuss the factors related to composition model performance. All evaluations were conducted in both English and Chinese. Our main findings are as follows: (1) The Additive model with semantic enhanced word vectors performs comparably to the state-of-the-art model; (2) The Additive model which updates augmented word vectors and the Matrix model with semantic enhanced word vectors systematically outperforms the state-of-the-art model in bigram and multi-word phrase similarity task, respectively; (3) Representing the high frequency phrases by estimating their surrounding contexts is a good training objective for bigram phrase similarity tasks; and (4) The performance gain of composition model with semantic enhanced word vectors is due to the composition function and the greater weight attached to important words. Previous works focus on the composition function; however, our findings indicate that other components in the composition model (especially word representation) make a critical difference in phrase representation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhat:2017:ITB, author = "Riyaz Ahmad Bhat and Irshad Ahmad Bhat and Dipti Misra Sharma", title = "Improving Transition-Based Dependency Parsing of {Hindi} and {Urdu} by Modeling Syntactically Relevant Phenomena", journal = j-TALLIP, volume = "16", number = "3", pages = "17:1--17:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3005447", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In recent years, transition-based parsers have shown promise in terms of efficiency and accuracy. Though these parsers have been extensively explored for multiple Indian languages, there is still considerable scope for improvement by properly incorporating syntactically relevant information. In this article, we enhance transition-based parsing of Hindi and Urdu by redefining the features and feature extraction procedures that have been previously proposed in the parsing literature of Indian languages. We propose and empirically show that properly incorporating syntactically relevant information like case marking, complex predication and grammatical agreement in an arc-eager parsing model can significantly improve parsing accuracy. Our experiments show an absolute improvement of $ \approx 2 $ \% LAS for parsing of both Hindi and Urdu over a competitive baseline which uses rich features like part-of-speech (POS) tags, chunk tags, cluster ids and lemmas. We also propose some heuristics to identify ezafe constructions in Urdu texts which show promising results in parsing these constructions.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2017:NER, author = "Arjun Das and Debasis Ganguly and Utpal Garain", title = "Named Entity Recognition with Word Embeddings and {Wikipedia} Categories for a Low-Resource Language", journal = j-TALLIP, volume = "16", number = "3", pages = "18:1--18:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3015467", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we propose a word embedding--based named entity recognition (NER) approach. NER is commonly approached as a sequence labeling task with the application of methods such as conditional random field (CRF). However, for low-resource languages without the presence of sufficiently large training data, methods such as CRF do not perform well. In our work, we make use of the proximity of the vector embeddings of words to approach the NER problem. The hypothesis is that word vectors belonging to the same name category, such as a person's name, occur in close vicinity in the abstract vector space of the embedded words. Assuming that this clustering hypothesis is true, we apply a standard classification approach on the vectors of words to learn a decision boundary between the NER classes. Our NER experiments are conducted on a morphologically rich and low-resource language, namely Bengali. Our approach significantly outperforms standard baseline CRF approaches that use cluster labels of word embeddings and gazetteers constructed from Wikipedia. Further, we propose an unsupervised approach (that uses an automatically created named entity (NE) gazetteer from Wikipedia in the absence of training data). For a low-resource language, the word vectors obtained from Wikipedia are not sufficient to train a classifier. As a result, we propose to make use of the distance measure between the vector embeddings of words to expand the set of Wikipedia training examples with additional NEs extracted from a monolingual corpus that yield significant improvement in the unsupervised NER performance. In fact, our expansion method performs better than the traditional CRF-based (supervised) approach (i.e., F-score of 65.4\% vs. 64.2\%). Finally, we compare our proposed approach to the official submission for the IJCNLP-2008 Bengali NER shared task and achieve an overall improvement of F-score 11.26\% with respect to the best official system.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2017:IDR, author = "Haoran Li and Jiajun Zhang and Chengqing Zong", title = "Implicit Discourse Relation Recognition for {English} and {Chinese} with Multiview Modeling and Effective Representation Learning", journal = j-TALLIP, volume = "16", number = "3", pages = "19:1--19:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3028772", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Discourse relations between two text segments play an important role in many Natural Language Processing (NLP) tasks. The connectives strongly indicate the sense of discourse relations, while in fact, there are no connectives in a large proportion of discourse relations, that is, implicit discourse relations. Compared with explicit relations, implicit relations are much harder to detect and have drawn significant attention. Until now, there have been many studies focusing on English implicit discourse relations, and few studies address implicit relation recognition in Chinese even though the implicit discourse relations in Chinese are more common than those in English. In our work, both the English and Chinese languages are our focus. The key to implicit relation prediction is to properly model the semantics of the two discourse arguments, as well as the contextual interaction between them. To achieve this goal, we propose a neural network based framework that consists of two hierarchies. The first one is the model hierarchy, in which we propose a max-margin learning method to explore the implicit discourse relation from multiple views. The second one is the feature hierarchy, in which we learn multilevel distributed representations from words, arguments, and syntactic structures to sentences. We have conducted experiments on the standard benchmarks of English and Chinese, and the results show that compared with several methods our proposed method can achieve the best performance in most cases.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tholpadi:2017:CBT, author = "Goutham Tholpadi and Chiranjib Bhattacharyya and Shirish Shevade", title = "Corpus-Based Translation Induction in {Indian} Languages Using Auxiliary Language Corpora from {Wikipedia}", journal = j-TALLIP, volume = "16", number = "3", pages = "20:1--20:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3038295", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Identifying translations from comparable corpora is a well-known problem with several applications. Existing methods rely on linguistic tools or high-quality corpora. Absence of such resources, especially in Indian languages, makes this problem hard; for example, state-of-the-art techniques achieve a mean reciprocal rank of 0.66 for English--Italian, and a mere 0.187 for Telugu-Kannada. In this work, we address the problem of comparable corpora-based translation correspondence induction (CC-TCI) when the only resources available are small noisy comparable corpora extracted from Wikipedia. We observe that translations in the source and target languages have many topically related words in common in other ``auxiliary'' languages. To model this, we define the notion of a translingual theme, a set of topically related words from auxiliary language corpora, and present a probabilistic framework for CC-TCI. Extensive experiments on 35 comparable corpora showed dramatic improvements in performance. We extend these ideas to propose a method for measuring cross-lingual semantic relatedness (CLSR) between words. To stimulate further research in this area, we make publicly available two new high-quality human-annotated datasets for CLSR. Experiments on the CLSR datasets show more than 200\% improvement in correlation on the CLSR task. We apply the method to the real-world problem of cross-lingual Wikipedia title suggestion and build the WikiTSu system. A user study on WikiTSu shows a 20\% improvement in the quality of titles suggested.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2017:HMC, author = "Hai Zhao and Deng Cai and Yang Xin and Yuzhu Wang and Zhongye Jia", title = "A Hybrid Model for {Chinese} Spelling Check", journal = j-TALLIP, volume = "16", number = "3", pages = "21:1--21:??", month = mar, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3047405", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Apr 3 08:15:52 MDT 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Spelling check for Chinese has more challenging difficulties than that for other languages. A hybrid model for Chinese spelling check is presented in this article. The hybrid model consists of three components: one graph-based model for generic errors and two independently trained models for specific errors. In the graph model, a directed acyclic graph is generated for each sentence, and the single-source shortest-path algorithm is performed on the graph to detect and correct general spelling errors at the same time. Prior to that, two types of errors over functional words (characters) are first solved by conditional random fields: the confusion of ``[Chinese characters]'' (at) (pinyin is zai in Chinese), ``[Chinese characters]'' (again, more, then) (pinyin: zai) and ``[Chinese characters]'' (of) (pinyin: de), ``[Chinese characters]'' (- ly, adverb-forming particle) (pinyin: de), and ``[Chinese characters]'' (so that, have to) (pinyin: de). Finally, a rule-based model is exploited to distinguish pronoun usage confusion: ``[Chinese characters]'' (she) (pinyin: ta), ``[Chinese characters]'' (he) (pinyin: ta), and some other common collocation errors. The proposed model is evaluated on the standard datasets released by the SIGHAN Bake-off shared tasks, giving state-of-the-art results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wali:2017:ECL, author = "Wafa Wali and Bilel Gargouri and Adelmajid Ben Hamadou", title = "Evaluating the Content of {LMF} Standardized Dictionaries: a Practical Experiment on {Arabic} Language", journal = j-TALLIP, volume = "16", number = "4", pages = "22:1--22:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3047406", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Since the age of paper versions, dictionaries are often published with anomalies in their content resulting from lexicographer's mistakes or from the lack of efficiency of automatic enrichment systems. Many of these anomalies are expensive to manually detect and difficult to automatically control, notably with lightly structured models of dictionaries. In this article, we take advantage of the fine structure proposed by the Lexical Markup Framework (LMF) norm to investigate the detection of anomalies in the content of LMF normalized dictionaries. First, we give a theoretical study on the plausible anomalies, such as inconsistency, incoherence, redundancy, and incompleteness. Second, we detail the approach that we propose for the automatic detection of such anomalies. Finally, we report on an experiment carried out on an available normalized dictionary of the Arabic language. The experiment has shown that the proposed approach gives reasonable results in terms of precision and recall.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Baly:2017:STM, author = "Ramy Baly and Hazem Hajj and Nizar Habash and Khaled Bashir Shaban and Wassim El-Hajj", title = "A Sentiment {Treebank} and Morphologically Enriched Recursive Deep Models for Effective Sentiment Analysis in {Arabic}", journal = j-TALLIP, volume = "16", number = "4", pages = "23:1--23:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3086576", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Accurate sentiment analysis models encode the sentiment of words and their combinations to predict the overall sentiment of a sentence. This task becomes challenging when applied to morphologically rich languages (MRL). In this article, we evaluate the use of deep learning advances, namely the Recursive Neural Tensor Networks (RNTN), for sentiment analysis in Arabic as a case study of MRLs. While Arabic may not be considered the only representative of all MRLs, the challenges faced and proposed solutions in Arabic are common to many other MRLs. We identify, illustrate, and address MRL-related challenges and show how RNTN is affected by the morphological richness and orthographic ambiguity of the Arabic language. To address the challenges with sentiment extraction from text in MRL, we propose to explore different orthographic features as well as different morphological features at multiple levels of abstraction ranging from raw words to roots. A key requirement for RNTN is the availability of a sentiment treebank; a collection of syntactic parse trees annotated for sentiment at all levels of constituency and that currently only exists in English. Therefore, our contribution also includes the creation of the first Arabic Sentiment Treebank (A rSenTB) that is morphologically and orthographically enriched. Experimental results show that, compared to the basic RNTN proposed for English, our solution achieves significant improvements up to 8\% absolute at the phrase level and 10.8\% absolute at the sentence level, measured by average F1 score. It also outperforms well-known classifiers including Support Vector Machines, Recursive Auto Encoders, and Long Short-Term Memory by 7.6\%, 3.2\%, and 1.6\% absolute respectively, all models being trained with similar morphological considerations.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Punchimudiyanse:2017:AFW, author = "Malinda Punchimudiyanse and Ravinda Gayan Narendra Meegama", title = "Animation of Fingerspelled Words and Number Signs of the {Sinhala} Sign Language", journal = j-TALLIP, volume = "16", number = "4", pages = "24:1--24:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3092743", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Sign language is the primary communication medium of the aurally handicapped community. Often, a sign gesture is mapped to a word or a phrase in a spoken language and named as a conversational sign. A fingerspelling sign is a special sign derived to show a single character that matches a character in the alphabet of a given language. This enables the deaf community to express words that do not have a conversational sign, such as a name, using a letter-by-letter technique. Sinhala Sign Language (SSL) uses a phonetic pronunciation mechanism to decode such words due to the presence of one or more modifiers after a consonant. Expressing numbers also have a similar notation, and it is broken down into parts before interpretation in sign gestures. This article presents the variations implemented to make the 3D avatar-based interpreter system look similar to an actual fingerspelled SSL by a human interpreter. To accomplish the task, a phonetic English-based 3D avatar animation system is developed with Blender animation software. The conversion of Sinhala Unicode text to phonetic English and numbers written in digits to sign gestures is done with a Visual Basic.NET (VB.NET) application. The presented application has 61 SSL fingerspelling signs and 40 SSL number signs. It is capable of interpreting any word written using the modern Sinhala alphabet without conversational signs and interprets the numbers that go up to the billions. This is a helpful tool in teaching SSL fingerspelling and number signs of SSL to deaf children.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Sallab:2017:ARD, author = "Ahmad Al-Sallab and Ramy Baly and Hazem Hajj and Khaled Bashir Shaban and Wassim El-Hajj and Gilbert Badaro", title = "{AROMA}: a Recursive Deep Learning Model for Opinion Mining in {Arabic} as a Low Resource Language", journal = j-TALLIP, volume = "16", number = "4", pages = "25:1--25:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3086575", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "While research on English opinion mining has already achieved significant progress and success, work on Arabic opinion mining is still lagging. This is mainly due to the relative recency of research efforts in developing natural language processing (NLP) methods for Arabic, handling its morphological complexity, and the lack of large-scale opinion resources for Arabic. To close this gap, we examine the class of models used for English and that do not require extensive use of NLP or opinion resources. In particular, we consider the Recursive Auto Encoder (RAE). However, RAE models are not as successful in Arabic as they are in English, due to their limitations in handling the morphological complexity of Arabic, providing a more complete and comprehensive input features for the auto encoder, and performing semantic composition following the natural way constituents are combined to express the overall meaning. In this article, we propose A Recursive Deep Learning Model for Opinion Mining in Arabic (AROMA) that addresses these limitations. AROMA was evaluated on three Arabic corpora representing different genres and writing styles. Results show that AROMA achieved significant performance improvements compared to the baseline RAE. It also outperformed several well-known approaches in the literature.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kong:2017:CSE, author = "Fang Kong and Guodong Zhou", title = "A {CDT}-Styled End-to-End {Chinese} Discourse Parser", journal = j-TALLIP, volume = "16", number = "4", pages = "26:1--26:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3099557", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Discourse parsing is a challenging task and plays a critical role in discourse analysis. Since the release of the Rhetorical Structure Theory Discourse Treebank and the Penn Discourse Treebank, the research on English discourse parsing has attracted increasing attention and achieved considerable success in recent years. At the same time, some preliminary research on certain subtasks about discourse parsing for other languages, such as Chinese, has been conducted. In this article, we present an end-to-end Chinese discourse parser with the Connective-Driven Dependency Tree scheme, which consists of multiple components in a pipeline architecture, such as the elementary discourse unit (EDU) detector, discourse relation recognizer, discourse parse tree generator, and attribution labeler. In particular, the attribution labeler determines two attributions (i.e., sense and centering) for every nonterminal node (i.e., discourse relation) in the discourse parse trees. Systematically, our parser detects all EDUs in a free text, generates the discourse parse tree in a bottom-up way, and determines the sense and centering attributions for all nonterminal nodes by traversing the discourse parse tree. Comprehensive evaluation on the Connective-Driven Dependency Treebank corpus from both component-wise and error-cascading perspectives is conducted to illustrate how each component performs in isolation, and how the pipeline performs with error propagation. Finally, it shows that our end-to-end Chinese discourse parser achieves an overall F1 score of 20\% with full automation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2017:PAL, author = "Shih-Hung Liu and Kuan-Yu Chen and Yu-Lun Hsieh and Berlin Chen and Hsin-Min Wang and Hsu-Chun Yen and Wen-Lian Hsu", title = "A Position-Aware Language Modeling Framework for Extractive Broadcast News Speech Summarization", journal = j-TALLIP, volume = "16", number = "4", pages = "27:1--27:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3099472", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Extractive summarization, a process that automatically picks exemplary sentences from a text (or spoken) document with the goal of concisely conveying key information therein, has seen a surge of attention from scholars and practitioners recently. Using a language modeling (LM) approach for sentence selection has been proven effective for performing unsupervised extractive summarization. However, one of the major difficulties facing the LM approach is to model sentences and estimate their parameters more accurately for each text (or spoken) document. We extend this line of research and make the following contributions in this work. First, we propose a position-aware language modeling framework using various granularities of position-specific information to better estimate the sentence models involved in the summarization process. Second, we explore disparate ways to integrate the positional cues into relevance models through a pseudo-relevance feedback procedure. Third, we extensively evaluate various models originated from our proposed framework and several well-established unsupervised methods. Empirical evaluation conducted on a broadcast news summarization task further demonstrates performance merits of the proposed summarization methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Phani:2017:SLA, author = "Shanta Phani and Shibamouli Lahiri and Arindam Biswas", title = "A Supervised Learning Approach for Authorship Attribution of {Bengali} Literary Texts", journal = j-TALLIP, volume = "16", number = "4", pages = "28:1--28:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3099473", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Authorship Attribution is a long-standing problem in Natural Language Processing. Several statistical and computational methods have been used to find a solution to this problem. In this article, we have proposed methods to deal with the authorship attribution problem in Bengali. More specifically, we proposed a supervised framework consisting of lexical and shallow features and investigated the possibility of using topic-modeling-inspired features, to classify documents according to their authors. We have created a corpus from nearly all the literary works of three eminent Bengali authors, consisting of 3,000 disjoint samples. Our models showed better performance than the state-of-the-art, with more than 98\% test accuracy for the shallow features and 100\% test accuracy for the topic-based features. Further experiments with GloVe vectors [Pennington et al. 2014] showed comparable results, but flexible patterns based on content words and high-frequency words [Schwartz et al. 2013] failed to perform as well as expected.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Passban:2017:TLR, author = "Peyman Passban and Qun Liu and Andy Way", title = "Translating Low-Resource Languages by Vocabulary Adaptation from Close Counterparts", journal = j-TALLIP, volume = "16", number = "4", pages = "29:1--29:??", month = sep, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3099556", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Some natural languages belong to the same family or share similar syntactic and/or semantic regularities. This property persuades researchers to share computational models across languages and benefit from high-quality models to boost existing low-performance counterparts. In this article, we follow a similar idea, whereby we develop statistical and neural machine translation (MT) engines that are trained on one language pair but are used to translate another language. First we train a reliable model for a high-resource language, and then we exploit cross-lingual similarities and adapt the model to work for a close language with almost zero resources. We chose Turkish (Tr) and Azeri or Azerbaijani (Az) as the proposed pair in our experiments. Azeri suffers from lack of resources as there is almost no bilingual corpus for this language. Via our techniques, we are able to train an engine for the Az -{$>$} English (En) direction, which is able to outperform all other existing models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{S:2017:RMI, author = "Sreelekha S. and Pushpak Bhattacharyya", title = "Role of Morphology Injection in {SMT}: a Case Study from {Indian} Language Perspective", journal = j-TALLIP, volume = "17", number = "1", pages = "1:1--1:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3129208", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Phrase-based Statistical Machine Translation (PBSMT) is commonly used for automatic translation. However, PBSMT runs into difficulty when either or both of the source and target languages are morphologically rich. Factored models are found to be useful for such cases, as they consider word as a vector of factors. These factors can contain any information about the surface word and use it while translating. The objective of the current work is to handle morphological inflections in Hindi, Marathi, and Malayalam using Factored translation models when translating from English. Statistical MT approaches face the problem of data sparsity when translating to a morphologically rich language. It is very unlikely for a parallel corpus to contain all morphological forms of words. We propose a solution to generate these unseen morphological forms and inject them into the original training corpus. We propose a simple and effective solution based on enriching the input with various morphological forms of words. We observe that morphology injection improves the quality of translation in terms of both adequacy and fluency. We verify this with experiments on three morphologically rich languages when translating from English. From the detailed evaluations, we observed an order of magnitude improvement in translation quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Malik:2017:UNE, author = "Muhammad Kamran Malik", title = "{Urdu} Named Entity Recognition and Classification System Using Artificial Neural Network", journal = j-TALLIP, volume = "17", number = "1", pages = "2:1--2:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3129290", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Named Entity Recognition and Classification (NERC) is a process of identifying words and classifying them into person names, location names, organization names, and so on. In this article, we discuss the development of an Urdu Named Entity (NE) corpus, called the Kamran-PU-NE (KPU-NE) corpus, for three entity types, that is, Person, Organization, and Location, and marking the remaining tokens as Others (O). We use two supervised learning algorithms, Hidden Markov Model (HMM) and Artificial Neural Network (ANN), for the development of the Urdu NERC system. We annotate the 652852-token corpus taken from 15 different genres with a total of 44480 NEs. The inter-annotator agreement between the two annotators in terms of Kappa k statistic is 73.41\%. With HMM, the highest recorded precision, recall, and f-measure values are 55.98\%, 83.11\%, and 66.90\%, respectively, and with ANN, they are 81.05\%, 87.54\%, and 84.17\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kim:2017:PEN, author = "Hyun Kim and Hun-Young Jung and Hongseok Kwon and Jong-Hyeok Lee and Seung-Hoon Na", title = "Predictor--Estimator: Neural Quality Estimation Based on Target Word Prediction for Machine Translation", journal = j-TALLIP, volume = "17", number = "1", pages = "3:1--3:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3109480", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Recently, quality estimation has been attracting increasing interest from machine translation researchers, aiming at finding a good estimator for the ``quality'' of machine translation output. The common approach for quality estimation is to treat the problem as a supervised regression/classification task using a quality-annotated noisy parallel corpus, called quality estimation data, as training data. However, the available size of quality estimation data remains small, due to the too-expensive cost of creating such data. In addition, most conventional quality estimation approaches rely on manually designed features to model nonlinear relationships between feature vectors and corresponding quality labels. To overcome these problems, this article proposes a novel neural network architecture for quality estimation task-called the predictor-estimator -that considers word prediction as an additional pre-task. The major component of the proposed neural architecture is a word prediction model based on a modified neural machine translation model-a probabilistic model for predicting a target word conditioned on all the other source and target contexts. The underlying assumption is that the word prediction model is highly related to quality estimation models and is therefore able to transfer useful knowledge to quality estimation tasks. Our proposed quality estimation method sequentially trains the following two types of neural models: (1) Predictor: a neural word prediction model trained from parallel corpora and (2) Estimator: a neural quality estimation model trained from quality estimation data. To transfer word a prediction task to a quality estimation task, we generate quality estimation feature vectors from the word prediction model and feed them into the quality estimation model. The experimental results on WMT15 and 16 quality estimation datasets show that our proposed method has great potential in the various sub-challenges.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Almeman:2017:ABV, author = "Khalid Almeman", title = "Automatically Building {VoIP} Speech Parallel Corpora for {Arabic} Dialects", journal = j-TALLIP, volume = "17", number = "1", pages = "4:1--4:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3132708", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article discusses the process of automatically building Arabic multi-dialect speech corpora using Voice over Internet Protocol (VoIP). The Asterisk framework was adopted to act as the main connection between the parties, for which two virtual machines were created: a sender and a receiver. The sender makes a VoIP call to the receiver using the Asterisk framework, while the receiver records the call automatically, a process that is repeated for all the audio files involved in the corpora. In this work, more than 67,000 automatic calls were made between the sender and receiver machines, generating VoIP Arabic corpora for four Arabic dialects. The resulting corpora can be considered the first Arabic VoIP parallel speech corpora and will be made freely available to researchers in Arabic NLP and speech recognition research.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tran:2017:LRB, author = "Phuoc Tran and Dien Dinh and Tan Le and Long H. B. Nguyen", title = "Linguistic-Relationships-Based Approach for Improving Word Alignment", journal = j-TALLIP, volume = "17", number = "1", pages = "5:1--5:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3133323", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The unsupervised word alignments (such as GIZA++) are widely used in the phrase-based statistical machine translation. The quality of the model is proportional to the size and the quality of the bilingual corpus. However, for low-resource language pairs such as Chinese and Vietnamese, a result of unsupervised word alignment sometimes is of low quality due to the sparse data. In addition, this model does not take advantage of the linguistic relationships to improve performance of word alignment. Chinese and Vietnamese have the same language type and have close linguistic relationships. In this article, we integrate the characteristics of linguistic relationships into the word alignment model to enhance the quality of Chinese-Vietnamese word alignment. These linguistic relationships are Sino-Vietnamese and content word. The experimental results showed that our method improved the performance of word alignment as well as the quality of machine translation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cheng:2017:ECC, author = "Xiyao Cheng and Ying Chen and Bixiao Cheng and Shoushan Li and Guodong Zhou", title = "An Emotion Cause Corpus for {Chinese} Microblogs with Multiple-User Structures", journal = j-TALLIP, volume = "17", number = "1", pages = "6:1--6:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3132684", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A notably challenging problem in emotion analysis is recognizing the cause of an emotion. Although there have been a few studies on emotion cause detection, most of them work on news reports or a few of them focus on microblogs using a single-user structure (i.e., all texts in a microblog are written by the same user). In this article, we focus on emotion cause detection for Chinese microblogs using a multiple-user structure (i.e., texts in a microblog are successively written by several users). First, based on the fact that the causes of an emotion of a focused user may be provided by other users in a microblog with the multiple-user structure, we design an emotion cause annotation scheme which can deal with such a complicated case, and then provide an emotion cause corpus using the annotation scheme. Second, based on the analysis of the emotion cause corpus, we formalize two emotion cause detection tasks for microblogs (current-subtweet-based emotion cause detection and original-subtweet-based emotion cause detection). Furthermore, in order to examine the difficulty of the two emotion cause detection tasks and the contributions of texts written by different users in a microblog with the multiple-user structure, we choose two popular classification methods (SVM and LSTM) to do emotion cause detection. Our experiments show that the current-subtweet-based emotion cause detection is much more difficult than the original-subtweet-based emotion cause detection, and texts written by different users are very helpful for both emotion cause detection tasks. This study presents a pilot study of emotion cause detection which deals with Chinese microblogs using a complicated structure.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarma:2017:DAS, author = "Himangshu Sarma and Navanath Saharia and Utpal Sharma", title = "Development and Analysis of Speech Recognition Systems for {Assamese} Language Using {HTK}", journal = j-TALLIP, volume = "17", number = "1", pages = "7:1--7:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3137055", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Language analysis is very important for the native speaker to connect with the digital world. Assamese is a relatively unexplored language. In this report, we analyze different aspects of speech-to-text processing, starting from building a speech corpus, defining syllable rules, and finally developing a speech search engine of Assamese. We have collected about 20 hours of speech in three (viz., read, extempore, and conversation) modes and transcribed it. We also discuss some issues and challenges faced during development of the corpus. We have developed an automatic syllabification model with 11 rules for the Assamese language and found an accuracy of more than 95\% in our result. We found 12 different syllable patterns where 5 are found most frequent. The maximum length of a syllable found is four letters. With the help of Hidden Markov Model Toolkit (HTK) 3.5, we used deep learning based neural network for our speech recognition model, where we obtained 78.05\% accuracy for automatic transcription of Assamese speech.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhattacharya:2017:COB, author = "Nilanjana Bhattacharya and Umapada Pal and Partha Pratim Roy", title = "Cleaning of Online {Bangla} Free-form Handwritten Text", journal = j-TALLIP, volume = "17", number = "1", pages = "8:1--8:??", month = nov, year = "2017", CODEN = "????", DOI = "https://doi.org/10.1145/3145538", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Dec 23 10:06:06 MST 2017", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the normal free-form handwritten text, repetition (repeated writing of the same stroke several times in the same place), over-writing, and crossing out are very common. In this article, we call the presence of these three types of writing as ``noise.'' Cleaning to extract useful text from such types of noisy text is an important task for robust recognition. To the best of our knowledge, no work has been reported on cleaning of such noise from online text in any scripts and hence, in this article, we propose an automatic text-cleaning approach for online handwriting recognition. Here, at first, crossing out noise with straight strike-through lines is detected using the straightness criteria of online strokes. Next, regions containing repetition, over-writing, and other types of crossing out are located using the positional information of the overlapping strokes. Stroke density, self-intersections of strokes etc. are computed from the strokes of located regions to predict the type of noise and this type of information is used as follows for their cleaning. For cleaning of crossing outs, all strokes of the crossing-out region are removed. For cleaning repetition and over-writing, strokes written earlier are removed, keeping the latest strokes. Finally, delayed strokes are properly arranged and word is passed to online recognizer. Though recognition of free-form handwriting is quite difficult, in this attempt, we obtained up to 70.71\% improvement in word-recognition accuracy after noise cleaning.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nasution:2018:GCA, author = "Arbi Haza Nasution and Yohei Murakami and Toru Ishida", title = "A Generalized Constraint Approach to Bilingual Dictionary Induction for Low-Resource Language Families", journal = j-TALLIP, volume = "17", number = "2", pages = "9:1--9:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3138815", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The lack or absence of parallel and comparable corpora makes bilingual lexicon extraction a difficult task for low-resource languages. The pivot language and cognate recognition approaches have been proven useful for inducing bilingual lexicons for such languages. We propose constraint-based bilingual lexicon induction for closely related languages by extending constraints from the recent pivot-based induction technique and further enabling multiple symmetry assumption cycle to reach many more cognates in the transgraph. We further identify cognate synonyms to obtain many-to-many translation pairs. This article utilizes four datasets: one Austronesian low-resource language and three Indo-European high-resource languages. We use three constraint-based methods from our previous work, the Inverse Consultation method and translation pairs generated from Cartesian product of input dictionaries as baselines. We evaluate our result using the metrics of precision, recall, and F-score. Our customizable approach allows the user to conduct cross validation to predict the optimal hyperparameters (cognate threshold and cognate synonym threshold) with various combination of heuristics and number of symmetry assumption cycles to gain the highest F-score. Our proposed methods have statistically significant improvement of precision and F-score compared to our previous constraint-based methods. The results show that our method demonstrates the potential to complement other bilingual dictionary creation methods like word alignment models using parallel corpora for high-resource languages while well handling low-resource languages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Onyenwe:2018:BLR, author = "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu Chinedu and Ignatius Ezeani", title = "A Basic Language Resource Kit Implementation for the {Igbo} {NLP} Project", journal = j-TALLIP, volume = "17", number = "2", pages = "10:1--10:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3146387", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Igbo, an African language with around 32 million speakers worldwide, is one of the many languages having few or none of the language processing resources needed for advanced language technology applications. In this article, we describe the approach taken to creating an initial set of resources for Igbo, including an electronic text corpus, a part-of-speech (POS) tagset, and a POS-tagged subcorpus. We discuss the approach taken in gathering texts, the preprocessing of these texts, and the development of the POS tagged corpus. We also discuss some of the problems encountered during corpus and tagset development and the solutions arrived at for these problems.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jia:2018:IDP, author = "Yanyan Jia and Yansong Feng and Yuan Ye and Chao Lv and Chongde Shi and Dongyan Zhao", title = "Improved Discourse Parsing with Two-Step Neural Transition-Based Model", journal = j-TALLIP, volume = "17", number = "2", pages = "11:1--11:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3152537", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Discourse parsing aims to identify structures and relationships between different discourse units. Most existing approaches analyze a whole discourse at once, which often fails in distinguishing long-span relations and properly representing discourse units. In this article, we propose a novel parsing model to analyze discourse in a two-step fashion with different feature representations to characterize intra sentence and inter sentence discourse structures, respectively. Our model works in a transition-based framework and benefits from a stack long short-term memory neural network model. Experiments on benchmark tree banks show that our method outperforms traditional 1-step parsing methods in both English and Chinese.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Naili:2018:CSS, author = "Marwa Naili and Anja Habacha Chaibi and Henda {Hajjami Ben Ghezala}", title = "The Contribution of Stemming and Semantics in {Arabic} Topic Segmentation", journal = j-TALLIP, volume = "17", number = "2", pages = "12:1--12:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3152464", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Topic Segmentation is one of the pillars of Natural Language Processing. Yet there is a remarkable research gap in this field, as far as the Arabic language is concerned. The purpose of this article is to improve Arabic Topic Segmentation (ATS) by inquiring into two segmenters: ArabC99 and ArabTextTiling. This study is carried out on two independent levels: the pre-processing level and the segmentation level. These levels represent the basic steps of topic segmentation. On the pre-processing level, we examine the effect of using different Arabic stemming algorithms on ATS. We find out that Light10 is more appropriate for the pre-processing step. Based on this conclusion, we proceed to the second level by proposing two Arabic segmenters called ArabC99-LS-LSA and ArabTextTiling-LS-LSA. These latter use external semantic knowledge related to the Latent Semantic Analysis (LSA). Based on the evaluation results, we notice that LSA provides improvements in this field. Hence, the main outcome of this article emphasizes the multilevel improvement of ATS based on Light10 and LSA.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fujita:2018:EPL, author = "Atsushi Fujita and Pierre Isabelle", title = "Expanding Paraphrase Lexicons by Exploiting Generalities", journal = j-TALLIP, volume = "17", number = "2", pages = "13:1--13:??", month = feb, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3160488", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Techniques for generating and recognizing paraphrases, i.e., semantically equivalent expressions, play an important role in a wide range of natural language processing tasks. In the last decade, the task of automatic acquisition of subsentential paraphrases, i.e., words and phrases with (approximately) the same meaning, has been drawing much attention in the research community. The core problem is to obtain paraphrases of high quality in large quantity. This article presents a method for tackling this issue by systematically expanding an initial seed lexicon made up of high-quality paraphrases. This involves automatically capturing morpho-semantic and syntactic generalizations within the lexicon and using them to leverage the power of large-scale monolingual data. Given an input set of paraphrases, our method starts by inducing paraphrase patterns that constitute generalizations over corresponding pairs of lexical variants, such as ``amending'' and ``amendment,'' in a fully empirical way. It then searches large-scale monolingual data for new paraphrases matching those patterns. The results of our experiments on English, French, and Japanese demonstrate that our method manages to expand seed lexicons by a large multiple. Human evaluation based on paraphrase substitution tests reveals that the automatically acquired paraphrases are also of high quality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2018:EEW, author = "Shaonan Wang and Jiajun Zhang and Chengqing Zong", title = "Empirical Exploring Word-Character Relationship for {Chinese} Sentence Representation", journal = j-TALLIP, volume = "17", number = "3", pages = "14:1--14:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3156778", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article addresses the problem of learning compositional Chinese sentence representations, which represent the meaning of a sentence by composing the meanings of its constituent words. In contrast to English, a Chinese word is composed of characters, which contain rich semantic information. However, this information has not been fully exploited by existing methods. In this work, we introduce a novel, mixed character-word architecture to improve the Chinese sentence representations by utilizing rich semantic information of inner-word characters. We propose two novel strategies to reach this purpose. The first one is to use a mask gate on characters, learning the relation among characters in a word. The second one is to use a max-pooling operation on words to adaptively find the optimal mixture of the atomic and compositional word representations. Finally, the proposed architecture is applied to various sentence composition models, which achieves substantial performance gains over baseline models on sentence similarity task. To further verify the generalization ability of our model, we employ the learned sentence representations as features in sentence classification task, question classification task, and sentence entailment task. Results have shown that the proposed mixed character-word sentence representation models outperform both the character-based and word-based models.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jia:2018:COR, author = "Shengbin Jia and Shijia E. and Maozhen Li and Yang Xiang", title = "{Chinese} Open Relation Extraction and Knowledge Base Establishment", journal = j-TALLIP, volume = "17", number = "3", pages = "15:1--15:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3162077", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Named entity relation extraction is an important subject in the field of information extraction. Although many English extractors have achieved reasonable performance, an effective system for Chinese relation extraction remains undeveloped due to the lack of Chinese annotation corpora and the specificity of Chinese linguistics. Here, we summarize three kinds of unique but common phenomena in Chinese linguistics. In this article, we investigate unsupervised linguistics-based Chinese open relation extraction (ORE), which can automatically discover arbitrary relations without any manually labeled datasets, and research the establishment of a large-scale corpus. By mapping the entity relations into dependency-trees and considering the unique Chinese linguistic characteristics, we propose a novel unsupervised Chinese ORE model based on Dependency Semantic Normal Forms (DSNFs). This model imposes no restrictions on the relative positions among entities and relationships and achieves a high yield by extracting relations mediated by verbs or nouns and processing the parallel clauses. Empirical results from our model demonstrate the effectiveness of this method, which obtains stable performance on four heterogeneous datasets and achieves better precision and recall in comparison with several Chinese ORE systems. Furthermore, a large-scale knowledge base of entity and relation, called COER, is established and published by applying our method to web text, which conquers the trouble of lack of Chinese corpora.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Marie:2018:PTI, author = "Benjamin Marie and Atsushi Fujita", title = "Phrase Table Induction Using Monolingual Data for Low-Resource Statistical Machine Translation", journal = j-TALLIP, volume = "17", number = "3", pages = "16:1--16:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3168054", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We propose a new method for inducing a phrase-based translation model from a pair of unrelated monolingual corpora. Our method is able to deal with phrases of arbitrary length and to find phrase pairs that are useful for statistical machine translation, without requiring large parallel or comparable corpora. First, our method generates phrase pairs through coupling source and target phrases separately collected from respective monolingual data. Then, for each phrase pair, we compute features using the monolingual data and a small quantity of parallel sentences. Finally, incorrect phrase pairs are pruned, and a phrase table is made using the remaining phrase pairs. In our experiments on French--Japanese and Spanish--Japanese translation tasks under low-resource conditions, we observe that incorporating a phrase table induced by our method to the machine translation system leads to large improvements in translation quality. Furthermore, we show that a phrase table induced by our method can also be useful in a wide range of configurations, including configurations where we have already access to large parallel corpora and configurations where only small monolingual corpora are available.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Salami:2018:ISS, author = "Shahram Salami and Mehrnoush Shamsfard", title = "Integrating Shallow Syntactic Labels in the Phrase-Boundary Translation Model", journal = j-TALLIP, volume = "17", number = "3", pages = "17:1--17:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178460", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Using a novel rule labeling method, this article proposes a hierarchical model for statistical machine translation. The proposed model labels translation rules by matching the boundaries of target side phrases with the shallow syntactic labels including POS tags and chunk labels on the target side of the training corpus. The boundary labels are concatenated if there is no label for the whole target span. Labeling with the classes of boundary words on the target side phrases has been previously proposed as a phrase-boundary model which can be considered as the base form of our model. In the extended model, the labeler uses a POS tag if there is no chunk label in one boundary. Using chunks as phrase labels, the proposed model generalizes the rules to decrease the model sparseness. The sparseness is a more important issue in the language pairs with a lot of differences in the word order because they have less number of aligned phrase pairs for extraction of rules. The extended phrase-boundary model is also applicable for low-resource languages having no syntactic parser. Some experiments are performed with the proposed model, the base phrase-boundary model, and variants of Syntax Augmented Machine Translation (SAMT) in translation from Persian and German to English as source and target languages with different word orders. According to the results, the proposed model improves the translation performance in the quality and decoding time aspects. Using BLEU as our metric, the proposed model has achieved a statistically significant improvement of about 0.5 point over the base phrase-boundary model.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sherkawi:2018:ASA, author = "Lina Sherkawi and Nada Ghneim and Oumayma {Al Dakkak}", title = "{Arabic} Speech Act Recognition Techniques", journal = j-TALLIP, volume = "17", number = "3", pages = "18:1--18:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3170576", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article presents rule-based and statistical-based techniques for Arabic speech act recognition. The proposed techniques classify an utterance into Arabic speech act categories based on three criteria: surface features, cue words, and contextual information. A rule-based expert system has been developed in a bootstrapping manner based on the fact that Arabic language syntax is inherently rule-based. Various machine-learning algorithms have been used to detect Arabic speech act categories: Decision Tree, Na{\"\i}ve Bayes, Neural Network, and SVM. We compare the experimental results for both techniques (machine-learning and rule-based expert systems). Using a corpus of 1,500 sentences, the rule-based expert system achieved an accuracy rate of 98.92\%, while the Decision Tree, Na{\"\i}ve Bayes, Neural Network, and SVM achieved an accuracy rate of 97.09\%, 96.48\%, 93.50\%, and 93.70\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jung:2018:EEK, author = "Sangkeun Jung and Changki Lee and Hyunsun Hwang", title = "End-to-End {Korean} Part-of-Speech Tagging Using Copying Mechanism", journal = j-TALLIP, volume = "17", number = "3", pages = "19:1--19:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178458", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we introduce a novel neural architecture for the end-to-end Korean Part-of-Speech (POS) tagging problem. To address the problem, we extend the present recurrent neural network-based sequence-to-sequence models to deal with the key challenges in this task: rare word generation and POS tagging. To overcome these issues, Input-Feeding and Copying mechanism are adopted. Although our approach does not require any manual features or preprocessed pattern matching dictionaries, our best single model achieves an F-score of 97.08. This is competitive with the current state-of-the-art model (F-score 98.03), which requires extensive manual feature processing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sen:2018:AST, author = "Shibaprasad Sen and Ankan Bhattacharyya and Pawan Kumar Singh and Ram Sarkar and Kaushik Roy and David Doermann", title = "Application of Structural and Topological Features to Recognize Online Handwritten {Bangla} Characters", journal = j-TALLIP, volume = "17", number = "3", pages = "20:1--20:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178457", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article presents a set of novel features for robust online Bangla handwritten character recognition. Two feature extraction methods are presented here. The first describes the transition from background to foreground pixels and vice versa. The second uses a combination of topological features and centre-of-gravity- (CG) based circular features where global information, local information, and Circular Quadrant Mass Distribution information have been extracted. The impact of each along with their combination have also been analyzed. A total of 15,000 isolated online Bangla character samples have been collected and used for the evaluation. A Support Vector Machine classifier records the best recognition rate when the transition count feature, CG-based circular features, and topological features are combined.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{She:2018:LHD, author = "Xiaohan She and Ping Jian and Pengcheng Zhang and Heyan Huang", title = "Leveraging Hierarchical Deep Semantics to Classify Implicit Discourse Relations via a Mutual Learning Method", journal = j-TALLIP, volume = "17", number = "3", pages = "21:1--21:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178456", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article presents a mutual learning method using hierarchical deep semantics for the classification of implicit discourse relations in English. With the absence of explicit discourse markers, traditional discourse techniques mainly concentrate on discrete linguistic features in this task, which always leads to a data sparseness problem. To relieve this problem, we propose a mutual learning neural model that makes use of multilevel semantic information together, including the distribution of implicit discourse relations, the semantics of arguments, and the co-occurrence of phrases and words. During the training process, the predicting targets of the model, which are the probability of the discourse relation type and the distributed representation of semantic components, are learned jointly and optimized mutually. The experimental results show that this method outperforms the previous works, especially in multiclass identification attributed to the hierarchical semantic representations and the mutual learning strategy.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mohamed:2018:MSP, author = "Emad Mohamed", title = "Morphological Segmentation and Part-of-Speech Tagging for the {Arabic} Heritage", journal = j-TALLIP, volume = "17", number = "3", pages = "22:1--22:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3178459", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We annotate 60,000 words of Classical Arabic (CA) with topics in philosophy, religion, literature, and law with fine-grain segment-based morphological descriptions. We use these annotations for building a morphological segmenter and part-of-speech (POS) tagger for CA. With character-level classification and features from the word and its lexical context, the segmenter achieves a word accuracy of 96.8\% with the main issue being a high rate of out-of-vocabulary words. A token-based POS tagger achieves an accuracy of 96.22\% with 97.72\% on known tokens despite the small size of the corpus. An error analysis shows that most of the tagging errors are results of segmentation and that quality improves with more data being added. The morphological segmenter and tagger have a wide range of potential applications in processing CA, a low-resource variety of the language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2018:IPK, author = "Degen Huang and Jiahuan Pei and Cong Zhang and Kaiyu Huang and Jianjun Ma", title = "Incorporating Prior Knowledge into Word Embedding for {Chinese} Word Similarity Measurement", journal = j-TALLIP, volume = "17", number = "3", pages = "23:1--23:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3182622", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Word embedding-based methods have received increasing attention for their flexibility and effectiveness in many natural language-processing (NLP) tasks, including Word Similarity (WS). However, these approaches rely on high-quality corpus and neglect prior knowledge. Lexicon-based methods concentrate on human's intelligence contained in semantic resources, e.g., Tongyici Cilin, HowNet, and Chinese WordNet, but they have the drawback of being unable to deal with unknown words. This article proposes a three-stage framework for measuring the Chinese word similarity by incorporating prior knowledge obtained from lexicons and statistics into word embedding: in the first stage, we utilize retrieval techniques to crawl the contexts of word pairs from web resources to extend context corpus. In the next stage, we investigate three types of single similarity measurements, including lexicon similarities, statistical similarities, and embedding-based similarities. Finally, we exploit simple combination strategies with math operations and the counter-fitting combination strategy using optimization method. To demonstrate our system's efficiency, comparable experiments are conducted on the PKU-500 dataset. Our final results are 0.561/0.516 of Spearman/Pearson rank correlation coefficient, which outperform the state-of-the-art performance to the best of our knowledge. Experiment results on Chinese MC-30 and SemEval-2012 datasets show that our system also performs well on other Chinese datasets, which proves its transferability. Besides, our system is not language-specific and can be applied to other languages, e.g., English.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ehsani:2018:CWT, author = "Razieh Ehsani and Ercan Solak and Olcay Taner Yildiz", title = "Constructing a {WordNet} for {Turkish} Using Manual and Automatic Annotation", journal = j-TALLIP, volume = "17", number = "3", pages = "24:1--24:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3185664", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we summarize the methodology and the results of our 2-year-long efforts to construct a comprehensive WordNet for Turkish. In our approach, we mine a dictionary for synonym candidate pairs and manually mark the senses in which the candidates are synonymous. We marked every pair twice by different human annotators. We derive the synsets by finding the connected components of the graph whose edges are synonym senses. We also mined Turkish Wikipedia for hypernym relations among the senses. We analyzed the resulting WordNet to highlight the difficulties brought about by the dictionary construction methods of lexicographers. After splitting the unusually large synsets, we used random walk-based clustering that resulted in a Zipfian distribution of synset sizes. We compared our results to BalkaNet and automatic thesaurus construction methods using variation of information metric. Our Turkish WordNet is available online.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2018:LRR, author = "Jizhou Huang and Shiqiang Ding and Haifeng Wang and Ting Liu", title = "Learning to Recommend Related Entities With Serendipity for {Web} Search Users", journal = j-TALLIP, volume = "17", number = "3", pages = "25:1--25:??", month = may, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3185663", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Entity recommendation, providing entity suggestions to assist users in discovering interesting information, has become an indispensable feature of today's Web search engine. However, the majority of existing entity recommendation methods are not designed to boost the performance in terms of serendipity, which also plays an important role in the appreciation of users for a recommendation system. To keep users engaged, it is important to take into account serendipity when building an entity recommendation system. In this article, we propose a learning to recommend framework that consists of two components: related entity finding and candidate entity ranking. To boost serendipity performance, three different sets of features that correlate with the three aspects of serendipity are employed in the proposed framework. Extensive experiments are conducted on large-scale, real-world datasets collected from a widely used commercial Web search engine. The experiments show that our method significantly outperforms several strong baseline methods. An analysis on the impact of features reveals that the set of interestingness features is the most powerful feature set, and the set of unexpectedness features can significantly contribute to recommendation effectiveness. In addition, online controlled experiments conducted on a commercial Web search engine demonstrate that our method can significantly improve user engagement against multiple baseline methods. This further confirms the effectiveness of the proposed framework.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Basiri:2018:WII, author = "Mohammad Ehsan Basiri and Arman Kabiri", title = "Words Are Important: Improving Sentiment Analysis in the {Persian} Language by Lexicon Refining", journal = j-TALLIP, volume = "17", number = "4", pages = "26:1--26:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3195633", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Lexicon-based sentiment analysis (SA) aims to address the problem of extracting people's opinions from their comments on the Web using a predefined lexicon of opinionated words. In contrast to the machine learning (ML) approach, lexicon-based methods are domain-independent methods that do not need a large annotated training corpus and hence are faster. This makes the lexicon-based approach prevalent in the SA community. However, the story is different for the Persian language. In contrast to English, using the lexicon-based method in Persian is a new discipline. There are rather limited resources available for SA in Persian, making the accuracy of the existing lexicon-based methods lower than other languages. In the current study, first an exhaustive investigation of the lexicon-based method is performed. Then two new resources are introduced to address the problem of resource scarcity for SA in Persian: a carefully labeled lexicon of sentiment words, PerLex, and a new handmade dataset of about 16,000 rated documents, PerView. Moreover, a new hybrid method using both ML and the lexicon-based approach is presented in which PerLex words are used to train the ML algorithm. Experiments are carried out on our new PerView dataset. Results indicate that the accuracy of PerLex is higher than the existing CNRC, Adjectives, SentiStrength, PerSent, and LexiPers lexicons. In addition, the results show that using PerLex significantly decreases the execution time of the proposed system in comparison to the above-mentioned lexicons. Moreover, the results demonstrate the excellence of using opinionated lexicon terms followed by bigrams as the features employed in the ML method.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Suryani:2018:RBS, author = "Arie Ardiyanti Suryani and Dwi Hendratmo Widyantoro and Ayu Purwarianti and Yayat Sudaryat", title = "The Rule-Based {Sundanese} Stemmer", journal = j-TALLIP, volume = "17", number = "4", pages = "27:1--27:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3195634", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Our research proposed an iterative Sundanese stemmer by removing the derivational affixes prior to the inflexional. This scheme was chosen because, in the Sundanese affixation, a confix (one of derivational affix) is applied in the last phase of a morphological process. Moreover, most of Sundanese affixes are derivational, so removing the derivational affix as the first step is reasonable. To handle ambiguity, the last recognized affix was returned as the result. As the baseline, a Confix-Stripping Approach that applies Porter Stemmer for the Indonesian language was used. This stemmer shares similarities in terms of affix type, but uses a different stemming order. To observe whether the baseline stems the Sundanese affixed word properly, some features that were not covered by the baseline, such as the infix and allomorph removal, were added. The evaluation was done using 4,453 unique affixed words collected from Sundanese online magazines. The experiment shows that, as a whole, our stemmer outperforms the modified baseline in terms of recognized affixed type accuracy and properly stemmed affixed words. Our stemmer recognized 68.87\% of the Sundanese affixed types and produced 96.79\% of the correctly affixed words; the modified baseline resulted in 21.70\% and 71.59\%, respectively", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{He:2018:DPS, author = "Ruifang He and Yaru Wang and Dawei Song and Peng Zhang and Yuan Jia and Aijun Li", title = "A Dependency Parser for Spontaneous {Chinese} Spoken Language", journal = j-TALLIP, volume = "17", number = "4", pages = "28:1--28:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3196278", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Dependency analysis is vital for spoken language understanding in spoken dialogue systems. However, existing research has mainly focused on western spoken languages, Japanese, and so on. Little research has been done for spoken Chinese in terms of dependency parsing. Therefore, the new spoken corpus, D-ESCSC (Dependency-Expressive Speech Corpus of Standard Chinese) is built by adding new dependency relations special to spoken Chinese based on a written Chinese annotation scheme. Since spoken Chinese contains typical ill-grammatical phenomena, e.g., translocation, repetition, duplication, and omission, the new atom feature related to punctuation and three feature templates are proposed to improve a graph-based dependency parser. Experimental results on spoken Chinese corpus show that the atom feature and three templates really work and the new parser outperforms the baseline parser. To our best knowledge, it is the first work to report dependency parsing results of spoken Chinese.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bai:2018:IVS, author = "Xuefeng Bai and Hailong Cao and Tiejun Zhao", title = "Improving Vector Space Word Representations Via Kernel Canonical Correlation Analysis", journal = j-TALLIP, volume = "17", number = "4", pages = "29:1--29:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3197566", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Cross-lingual word embeddings are representations for vocabularies of two or more languages in one common continuous vector space and are widely used in various natural language processing tasks. A state-of-the-art way to generate cross-lingual word embeddings is to learn a linear mapping, with an assumption that the vector representations of similar words in different languages are related by a linear relationship. However, this assumption does not always hold true, especially for substantially different languages. We therefore propose to use kernel canonical correlation analysis to capture a non-linear relationship between word embeddings of two languages. By extensively evaluating the learned word embeddings on three tasks (word similarity, cross-lingual dictionary induction, and cross-lingual document classification) across five language pairs, we demonstrate that our proposed approach achieves essentially better performances than previous linear methods on all of the three tasks, especially for language pairs with substantial typological difference.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Park:2018:NCI, author = "Taekeun Park and Seung-Hoon Kim", title = "Novel Character Identification Utilizing Semantic Relation with Animate Nouns in {Korean}", journal = j-TALLIP, volume = "17", number = "4", pages = "30:1--30:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3197657", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "For identifying speakers of quoted speech or extracting social networks from literature, it is indispensable to extract character names and nominals. However, detecting proper nouns in the novels translated into or written in Korean is harder than in English because Korean does not have a capitalization feature. In addition, it is almost impossible for any proper noun dictionary to include all kinds of character names that have been created or will be created by authors. Fortunately, a previous study shows that utilizing postpositions for animate nouns is a simple and effective tool for character identification in Korean novels without a proper noun dictionary and a training corpus. In this article, we propose a character identification method utilizing the semantic relation with known animate nouns. For 80 novels in Korean, the proposed method increases the micro- and macro-average recall by 13.68\% and 11.86\%, respectively, while decreasing the micro-average precision by 0.28\% and increasing the macro-average precision by 0.07\% compared to the previous study. If we focus on characters that are responsible for more than 1\% of the character name mentions in each novel, the micro- and macro-average F-measure of the proposed method are 96.98\% and 97.32\%, respectively.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2018:GBB, author = "Rui Wang and Hai Zhao and Sabine Ploux and Bao-Liang Lu and Masao Utiyama and Eiichiro Sumita", title = "Graph-Based Bilingual Word Embedding for Statistical Machine Translation", journal = j-TALLIP, volume = "17", number = "4", pages = "31:1--31:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3203078", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Bilingual word embedding has been shown to be helpful for Statistical Machine Translation (SMT). However, most existing methods suffer from two obvious drawbacks. First, they only focus on simple contexts such as an entire document or a fixed-sized sliding window to build word embedding and ignore latent useful information from the selected context. Second, the word sense but not the word should be the minimal semantic unit; however, most existing methods still use word representation. To overcome these drawbacks, this article presents a novel Graph-Based Bilingual Word Embedding (GBWE) method that projects bilingual word senses into a multidimensional semantic space. First, a bilingual word co-occurrence graph is constructed using the co-occurrence and pointwise mutual information between the words. Then, maximum complete subgraphs (cliques), which play the role of a minimal unit for bilingual sense representation, are dynamically extracted according to the contextual information. Consequently, correspondence analysis, principal component analyses, and neural networks are used to summarize the clique-word matrix into lower dimensions to build the embedding model. Without contextual information, the proposed GBWE can be applied to lexical translation. In addition, given contextual information, GBWE is able to give a dynamic solution for bilingual word representations, which can be applied to phrase translation and generation. Empirical results show that GBWE can enhance the performance of lexical translation, as well as Chinese/French-to-English and Chinese-to-Japanese phrase-based SMT tasks (IWSLT, NTCIR, NIST, and WAT).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hamdi:2018:CCS, author = "Ali Hamdi and Khaled Shaban and Anazida Zainal", title = "{CLASENTI}: a Class-Specific Sentiment Analysis Framework", journal = j-TALLIP, volume = "17", number = "4", pages = "32:1--32:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3209885", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Arabic text sentiment analysis suffers from low accuracy due to Arabic-specific challenges (e.g., limited resources, morphological complexity, and dialects) and general linguistic issues (e.g., fuzziness, implicit sentiment, sarcasm, and spam). The limited resources problem requires efforts to build new and improved Arabic corpora and lexica. We propose a class-specific sentiment analysis (CLASENTI) framework. The framework includes a new annotation approach to build multi-faceted Arabic corpus and lexicon allowing for simultaneous annotation of different facets, including domains, dialects, linguistic issues, and polarity strengths. Each of these facets has multiple classes (e.g., the nine classes representing dialects found in the Arab world). The new corpus and lexicon annotations facilitate the development of new class-specific classification models and polarity strength calculation. For the new sentiment classification models, we propose a hybrid model combining corpus-based and lexicon-based models. The corpus-based model has two interrelated phases to build; (1) full-corpus classification models for all facets; and (2) class-specific models trained on filtered subsets of the corpus according to the performances of the full-corpus models. To calculate polarity strengths, the lexicon-based model filters the annotated lexicon based on the specific classes of the domain and dialect. As a case study, we collect and annotate 15274 reviews from various sources, including surveys, Facebook comments, and Twitter posts, pertaining to governmental services. In addition, we develop a new web-based application to apply the proposed framework on the case study. CLASENTI framework reaches up to 95\% accuracy and 93\% F1-Score surpassing the best-known sentiment classifiers implemented in Scikit-learn library that achieve 82\% accuracy and 81\% F1-Score for Arabic when tested on the same dataset.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2018:DSN, author = "Limin Wang and Shoushan Li and Qian Yan and Guodong Zhou", title = "Domain-specific Named Entity Recognition with Document-Level Optimization", journal = j-TALLIP, volume = "17", number = "4", pages = "33:1--33:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3213544", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Previous studies normally formulate named entity recognition (NER) as a sequence labeling task and optimize the solution in the sentence level. In this article, we propose a document-level optimization approach to NER and apply it in a domain-specific document-level NER task. As a baseline, we apply a state-of-the-art approach, i.e., long-short-term memory (LSTM), to perform word classification. On this basis, we define a global objective function with the obtained word classification results and achieve global optimization via Integer Linear Programming (ILP). Specifically, in the ILP-based approach, we propose four kinds of constraints, i.e., label transition, entity length, label consistency, and domain-specific regulation constraints, to incorporate various entity recognition knowledge in the document level. Empirical studies demonstrate the effectiveness of the proposed approach to domain-specific document-level NER.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Komiya:2018:CMA, author = "Kanako Komiya and Masaya Suzuki and Tomoya Iwakura and Minoru Sasaki and Hiroyuki Shinnou", title = "Comparison of Methods to Annotate Named Entity Corpora", journal = j-TALLIP, volume = "17", number = "4", pages = "34:1--34:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3218820", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The authors compared two methods for annotating a corpus for the named entity (NE) recognition task using non-expert annotators: (i) revising the results of an existing NE recognizer and (ii) manually annotating the NEs completely. The annotation time, degree of agreement, and performance were evaluated based on the gold standard. Because there were two annotators for one text for each method, two performances were evaluated: the average performance of both annotators and the performance when at least one annotator is correct. The experiments reveal that semi-automatic annotation is faster, achieves better agreement, and performs better on average. However, they also indicate that sometimes, fully manual annotation should be used for some texts whose document types are substantially different from the training data document types. In addition, the machine learning experiments using semi-automatic and fully manually annotated corpora as training data indicate that the F-measures could be better for some texts when manual instead of semi-automatic annotation was used. Finally, experiments using the annotated corpora for training as additional corpora show that (i) the NE recognition performance does not always correspond to the performance of the NE tag annotation and (ii) the system trained with the manually annotated corpus outperforms the system trained with the semi-automatically annotated corpus with respect to newswires, even though the existing NE recognizer was mainly trained with newswires.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2018:WSP, author = "Deyu Zhou and Zhikai Zhang and Min-Ling Zhang and Yulan He", title = "Weakly Supervised {POS} Tagging without Disambiguation", journal = j-TALLIP, volume = "17", number = "4", pages = "35:1--35:??", month = aug, year = "2018", CODEN = "????", DOI = "https://doi.org/10.1145/3214707", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:31 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Weakly supervised part-of-speech (POS) tagging is to learn to predict the POS tag for a given word in context by making use of partial annotated data instead of the fully tagged corpora. Weakly supervised POS tagging would benefit various natural language processing applications in such languages where tagged corpora are mostly unavailable. In this article, we propose a novel framework for weakly supervised POS tagging based on a dictionary of words with their possible POS tags. In the constrained error-correcting output codes (ECOC)-based approach, a unique L -bit vector is assigned to each POS tag. The set of bitvectors is referred to as a coding matrix with value { 1, -1}. Each column of the coding matrix specifies a dichotomy over the tag space to learn a binary classifier. For each binary classifier, its training data is generated in the following way: each pair of words and its possible POS tags are considered as a positive training example only if the whole set of its possible tags falls into the positive dichotomy specified by the column coding and similarly for negative training examples. Given a word in context, its POS tag is predicted by concatenating the predictive outputs of the L binary classifiers and choosing the tag with the closest distance according to some measure. By incorporating the ECOC strategy, the set of all possible tags for each word is treated as an entirety without the need of performing disambiguation. Moreover, instead of manual feature engineering employed in most previous POS tagging approaches, features for training and testing in the proposed framework are automatically generated using neural language modeling. The proposed framework has been evaluated on three corpora for English, Italian, and Malagasy POS tagging, achieving accuracies of 93.21\%, 90.9\%, and 84.5\% individually, which shows a significant improvement compared to the state-of-the-art approaches.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhattacharya:2019:UCW, author = "Paheli Bhattacharya and Pawan Goyal and Sudeshna Sarkar", title = "Using Communities of Words Derived from Multilingual Word Vectors for Cross-Language Information Retrieval in {Indian} Languages", journal = j-TALLIP, volume = "18", number = "1", pages = "1:1--1:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3208358", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3208358", abstract = "We investigate the use of word embeddings for query translation to improve precision in cross-language information retrieval (CLIR). Word vectors represent words in a distributional space such that syntactically or semantically similar words are close to each other in this space. Multilingual word embeddings are constructed in such a way that similar words across languages have similar vector representations. We explore the effective use of bilingual and multilingual word embeddings learned from comparable corpora of Indic languages to the task of CLIR. We propose a clustering method based on the multilingual word vectors to group similar words across languages. For this we construct a graph with words from multiple languages as nodes and with edges connecting words with similar vectors. We use the Louvain method for community detection to find communities in this graph. We show that choosing target language words as query translations from the clusters or communities containing the query terms helps in improving CLIR. We also find that better-quality query translations are obtained when words from more languages are used to do the clustering even when the additional languages are neither the source nor the target languages. This is probably because having more similar words across multiple languages helps define well-defined dense subclusters that help us obtain precise query translations. In this article, we demonstrate the use of multilingual word embeddings and word clusters for CLIR involving Indic languages. We also make available a tool for obtaining related words and the visualizations of the multilingual word vectors for English, Hindi, Bengali, Marathi, Gujarati, and Tamil.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2019:OAE, author = "Maoxi Li and Mingwen Wang", title = "Optimizing Automatic Evaluation of Machine Translation with the {ListMLE} Approach", journal = j-TALLIP, volume = "18", number = "1", pages = "2:1--2:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3226045", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3226045", abstract = "Automatic evaluation of machine translation is critical for the evaluation and development of machine translation systems. In this study, we propose a new model for automatic evaluation of machine translation. The proposed model combines standard n-gram precision features and sentence semantic mapping features with neural features, including neural language model probabilities and the embedding distances between translation outputs and their reference translations. We optimize the model with a representative list-wise learning to rank approach, ListMLE, in terms of human ranking assessments. The experimental results on WMT'2015 Metrics task indicated that the proposed approach yields significantly better correlations with human assessments than several state-of-the-art baseline approaches. In particular, the results confirmed that the proposed list-wise learning to rank approach is useful and powerful for optimizing automatic evaluation metrics in terms of human ranking assessments. Deep analysis also demonstrated that optimizing automatic metrics with the ListMLE approach is a reasonable method and adding the neural features can gain considerable improvements compared with the traditional features.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Su:2019:RSA, author = "Ming-Hsiang Su and Chung-Hsien Wu and Kun-Yi Huang and Wu-Hsuan Lin", title = "Response Selection and Automatic Message-Response Expansion in Retrieval-Based {QA} Systems using Semantic Dependency Pair Model", journal = j-TALLIP, volume = "18", number = "1", pages = "3:1--3:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3229184", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3229184", abstract = "This article presents an approach to response selection and message-response (MR) database expansion from the unstructured data on the psychological consultation websites for a retrieval-based question answering (QA) system in a constrained domain for emotional support and comforting. First, we manually construct an initial MR database based on the articles collected from the psychological consultation websites. The Chinese Knowledge and Information Processing probabilistic context-free grammar is adopted to obtain the semantic dependency graphs (SDGs) of all the messages and responses in the initial MR database. For each sentence in the MR database, all the semantic dependencies, each composed of two words and their semantic relation, are extracted from the SDG of the sentence to form a semantic dependency set. Finally, a matrix with the element representing the correlation between the semantic dependencies of the messages and their corresponding responses is constructed as a semantic dependency pair model (SDPM) for response selection. Moreover, as the number of MR pairs in the psychological consultation websites is increasing day by day, the MR database in the QA system should be expanded to meet the needs of the users. For MR database expansion, the unstructured data from the message board are automatically collected. For the collected data, the supervised latent Dirichlet allocation is adopted for event detection and then the event-based delta Bayesian Information Criterion is used for message and response article segmentation. Each extracted message segment is then fed to the constructed retrieval-based QA system to find the best matched response segment and the matching score is also estimated to verify if the new MR pair is suitable to be included in the expanded MR database. Fivefold cross validation was employed to evaluate the performance of the proposed retrieval-based QA system over the expanded MR database based on SDPM. Compared to the vector space model-based method, the Okapi BM25 model, and the deep learning-based sequence-to-sequence with attention model, the proposed approach achieved a more favorable performance according to a statistical significance test. The retrieval accuracy based on MR expansion was also evaluated and a satisfactory result was obtained confirming the effectiveness of the expanded MR database. In addition, the user's satisfaction score of the proposed system was evaluated using the Cronbach's alpha value and the satisfaction score of the proposed SDPM was higher than those of the methods for comparison.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2019:IMH, author = "Guoping Huang and Jiajun Zhang and Yu Zhou and Chengqing Zong", title = "Input Method for Human Translators: a Novel Approach to Integrate Machine Translation Effectively and Imperceptibly", journal = j-TALLIP, volume = "18", number = "1", pages = "4:1--4:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3230638", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3230638", abstract = "Computer-aided translation (CAT) systems are the most popular tool for helping human translators efficiently perform language translation. To further improve the translation efficiency, there is an increasing interest in applying machine translation (MT) technology to upgrade CAT. To thoroughly integrate MT into CAT systems, in this article, we propose a novel approach: a new input method that makes full use of the knowledge adopted by MT systems, such as translation rules, decoding hypotheses, and n-best translation lists. The proposed input method contains two parts: a phrase generation model, allowing human translators to type target sentences quickly, and an n-gram prediction model, helping users choose perfect MT fragments smoothly. In addition, to tune the underlying MT system to generate the input method preferable results, we design a new evaluation metric for the MT system. The proposed input method integrates MT effectively and imperceptibly, and it is particularly suitable for many target languages with complex characters, such as Chinese and Japanese. The extensive experiments demonstrate that our method saves more than 23\% in time and over 42\% in keystrokes, and it also improves the translation quality by more than 5 absolute BLEU scores compared with the strong baseline, i.e., post-editing using Google Pinyin.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Altakrori:2019:AAA, author = "Malik H. Altakrori and Farkhund Iqbal and Benjamin C. M. Fung and Steven H. H. Ding and Abdallah Tubaishat", title = "{Arabic} Authorship Attribution: an Extensive Study on {Twitter} Posts", journal = j-TALLIP, volume = "18", number = "1", pages = "5:1--5:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3236391", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3236391", abstract = "Law enforcement faces problems in tracing the true identity of offenders in cybercrime investigations. Most offenders mask their true identity, impersonate people of high authority, or use identity deception and obfuscation tactics to avoid detection and traceability. To address the problem of anonymity, authorship analysis is used to identify individuals by their writing styles without knowing their actual identities. Most authorship studies are dedicated to English due to its widespread use over the Internet, but recent cyber-attacks such as the distribution of Stuxnet indicate that Internet crimes are not limited to a certain community, language, culture, ideology, or ethnicity. To effectively investigate cybercrime and to address the problem of anonymity in online communication, there is a pressing need to study authorship analysis of languages such as Arabic, Chinese, Turkish, and so on. Arabic, the focus of this study, is the fourth most widely used language on the Internet. This study investigates authorship of Arabic discourse/text, especially tiny text, Twitter posts. We benchmark the performance of a profile-based approach that uses n -grams as features and compare it with state-of-the-art instance-based classification techniques. Then we adapt an event-visualization tool that is developed for English to accommodate both Arabic and English languages and visualize the result of the attribution evidence. In addition, we investigate the relative effect of the training set, the length of tweets, and the number of authors on authorship classification accuracy. Finally, we show that diacritics have an insignificant effect on the attribution process and part-of-speech tags are less effective than character-level and word-level n -grams.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2019:WSB, author = "Shaoning Zhang and Cunli Mao and Zhengtao Yu and Hongbin Wang and Zhongwei Li and Jiafu Zhang", title = "Word Segmentation for {Burmese} Based on Dual-Layer {CRFs}", journal = j-TALLIP, volume = "18", number = "1", pages = "6:1--6:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3232537", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3232537", abstract = "Burmese is an isolated language, in which the syllable is the smallest unit. Syllable segmentation methods based on matching lead to performance subject to the syllable segmentation effect. This article proposes a word segmentation method with fusion conditions of double syllable features. It combines word segmentation and segmentation of syllables into one process, thus reducing the impact of errors on the syllable segmentation of Burmese. In the first layer of the conditional random fields (CRF) model, Burmese characters as atomic features are integrated into the Burma section of the Barkis Speech Paradigm (Backus normal form) features to realize the Burma syllable sequence tags. In the second layer of the CRFs model, with the syllable marked as input, it realizes the sequence markers through building a feature template with syllables as atomic features. The experimental results show that the proposed method has a better effect compared with the method based on the matching of syllables.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2019:IML, author = "Junjie Li and Haoran Li and Xiaomian Kang and Haitong Yang and Chengqing Zong", title = "Incorporating Multi-Level User Preference into Document-Level Sentiment Classification", journal = j-TALLIP, volume = "18", number = "1", pages = "7:1--7:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3234512", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3234512", abstract = "Document-level sentiment classification aims to predict a user's sentiment polarity in a document about a product. Most existing methods only focus on review contents and ignore users who post reviews. In fact, when reviewing a product, different users have different word-using habits to express opinions (i.e., word-level user preference), care about different attributes of the product (i.e., aspect-level user preference), and have different characteristics to score the review (i.e., polarity-level user preference). These preferences have great influence on interpreting the sentiment of text. To address this issue, we propose a model called Hierarchical User Attention Network (HUAN), which incorporates multi-level user preference into a hierarchical neural network to perform document-level sentiment classification. Specifically, HUAN encodes different kinds of information (word, sentence, aspect, and document) in a hierarchical structure and imports user embedding and user attention mechanism to model these preferences. Empirical results on two real-world datasets show that HUAN achieves state-of-the-art performance. Furthermore, HUAN can also mine important attributes of products for different users.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2019:UES, author = "Amita Jain and Minni Jain and Goonjan Jain and Devendra K. Tayal", title = "{``UTTAM''}: an Efficient Spelling Correction System for {Hindi} Language Based on Supervised Learning", journal = j-TALLIP, volume = "18", number = "1", pages = "8:1--8:??", month = jan, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3264620", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3264620", abstract = "In this article, we propose a system called ``UTTAM,'' for correcting spelling errors in Hindi language text using supervised learning. Unlike other languages, Hindi contains a large set of characters, words with inflections and complex characters, phonetically similar sets of characters, and so on. The complexity increases the possibility of confusion and occasionally leads to entering a wrong character in a word. The existence of spelling errors in text significantly decreases the accuracy of the available resources, like search engine, text editor, and so on. The proposed work is the first approach to correct non-word (Out of Vocabulary) errors as well as real-word errors simultaneously in a sentence of Hindi language. The proposed method investigates the human behavior, i.e., the type and frequency of spelling errors done by humans in Hindi text. Based on the type and frequency of spelling errors, the heterogeneous data is collected in matrices. This data in matrices is used to generate the suitable candidate words for an input word. After generating candidate words, the Viterbi algorithm is applied to perform the word correction. The Viterbi algorithm finds the best sequence of candidate words to correct the input sentence. For Hindi, this work is the first attempt for real-word error correction. For non-word errors, the experiments show that ``UTTAM'' performs better than the existing systems SpellGuru and Saksham.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Murthy:2019:INT, author = "Rudra Murthy and Mitesh M. Khapra and Pushpak Bhattacharyya", title = "Improving {NER} Tagging Performance in Low-Resource Languages via Multilingual Learning", journal = j-TALLIP, volume = "18", number = "2", pages = "9:1--9:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3238797", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3238797", abstract = "Existing supervised solutions for Named Entity Recognition (NER) typically rely on a large annotated corpus. Collecting large amounts of NER annotated corpus is time-consuming and requires considerable human effort. However, collecting small amounts of annotated corpus for any language is feasible, but the performance degrades due to data sparsity. We address the data sparsity by borrowing features from the data of a closely related language. We use hierarchical neural networks to train a supervised NER system. The feature borrowing from a closely related language happens via the shared layers of the network. The neural network is trained on the combined dataset of the low-resource language and a closely related language, also termed Multilingual Learning. Unlike existing systems, we share all layers of the network between the two languages. We apply multilingual learning for NER in Indian languages and empirically show the benefits over a monolingual deep learning system and a traditional machine-learning system with some feature engineering. Using multilingual learning, we show that the low-resource language NER performance increases mainly due to (1) increased named entity vocabulary, (2) cross-lingual subword features, and (3) multilingual learning playing the role of regularization.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jarrar:2019:DBM, author = "Mustafa Jarrar and Fadi Zaraket and Rami Asia and Hamzeh Amayreh", title = "Diacritic-Based Matching of {Arabic} Words", journal = j-TALLIP, volume = "18", number = "2", pages = "10:1--10:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3242177", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3242177", abstract = "Words in Arabic consist of letters and short vowel symbols called diacritics inscribed atop regular letters. Changing diacritics may change the syntax and semantics of a word; turning it into another. This results in difficulties when comparing words based solely on string matching. Typically, Arabic NLP applications resort to morphological analysis to battle ambiguity originating from this and other challenges. In this article, we introduce three alternative algorithms to compare two words with possibly different diacritics. We propose the Subsume knowledge-based algorithm, the Imply rule-based algorithm, and the Alike machine-learning-based algorithm. We evaluated the soundness, completeness, and accuracy of the algorithms against a large dataset of 86,886 word pairs. Our evaluation shows that the accuracy of Subsume (100\%), Imply (99.32\%), and Alike (99.53\%). Although accurate, Subsume was able to judge only 75\% of the data. Both Subsume and Imply are sound, while Alike is not. We demonstrate the utility of the algorithms using a real-life use case --- in lemma disambiguation and in linking hundreds of Arabic dictionaries.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhattacharya:2019:SSW, author = "Nilanjana Bhattacharya and Partha Pratim Roy and Umapada Pal", title = "Sub-Stroke-Wise Relative Feature for Online {Indic} Handwriting Recognition", journal = j-TALLIP, volume = "18", number = "2", pages = "11:1--11:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3264735", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3264735", abstract = "The main problem of Bangla (Bengali) and Devanagari handwriting recognition is the shape similarity of characters. There are only a few pieces of work on writer-independent cursive online Indian text recognition, and the shape similarity problem needs more attention from the researchers. To handle the shape similarity problem of cursive characters of Bangla and Devanagari scripts, in this article, we propose a new category of features called ` sub-stroke-wise relative feature ' (SRF) which are based on relative information of the constituent parts of the handwritten strokes. Relative information among some of the parts within a character can be a distinctive feature as it scales up small dissimilarities and enhances discrimination among similar-looking shapes. Also, contextual anticipatory phenomena are automatically modeled by this type of feature, as it takes into account the influence of previous and forthcoming strokes. We have tested popular state-of-the-art feature sets as well as proposed SRF using various (up to 20,000-word) lexicons and noticed that SRF significantly outperforms the state-of-the-art feature sets for online Bangla and Devanagari cursive word recognition.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mrinalini:2019:PBP, author = "K. Mrinalini and T. Nagarajan and P. Vijayalakshmi", title = "Pause-Based Phrase Extraction and Effective {OOV} Handling for Low-Resource Machine Translation Systems", journal = j-TALLIP, volume = "18", number = "2", pages = "12:1--12:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3265751", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3265751", abstract = "Machine translation is the core problem for several natural language processing research across the globe. However, building a translation system involving low-resource languages remains a challenge with respect to statistical machine translation (SMT). This work proposes and studies the effect of a phrase-induced hybrid machine translation system for translation from English to Tamil, under a low-resource setting. Unlike conventional hybrid MT systems, the free-word ordering feature of the target language Tamil is exploited to form a re-ordered target language model and to extend the parallel text corpus for training the SMT. In the current work, a novel rule-based phrase-extraction method, implemented using parts-of-speech (POS) and place-of-pause in both languages is proposed, which is used to pre-process the training corpus for developing the back-off phrase-induced SMT. Further, out-of-vocabulary (OOV) words are handled using speech-based transliteration and two-level thesaurus intersection techniques based on the POS tag of the OOV word. To ensure that the input with OOV words does not skip phrase-level translation in the hierarchical model, a phrase-level example-based machine translation approach is adopted to find the closest matching phrase and perform translation followed by OOV replacement. The proposed system results in a bilingual evaluation understudy score of 84.78 and a translation edit rate of 19.12. The performance of the system is compared in terms of adequacy and fluency, with existing translation systems for this specific language pair, and it is observed that the proposed system outperforms its counterparts.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Le:2019:LRM, author = "Ngoc Tan Le and Fatiha Sadat and Lucie Menard and Dien Dinh", title = "Low-Resource Machine Transliteration Using Recurrent Neural Networks", journal = j-TALLIP, volume = "18", number = "2", pages = "13:1--13:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3265752", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3265752", abstract = "Grapheme-to-phoneme models are key components in automatic speech recognition and text-to-speech systems. With low-resource language pairs that do not have available and well-developed pronunciation lexicons, grapheme-to-phoneme models are particularly useful. These models are based on initial alignments between grapheme source and phoneme target sequences. Inspired by sequence-to-sequence recurrent neural network--based translation methods, the current research presents an approach that applies an alignment representation for input sequences and pretrained source and target embeddings to overcome the transliteration problem for a low-resource languages pair. Evaluation and experiments involving French and Vietnamese showed that with only a small bilingual pronunciation dictionary available for training the transliteration models, promising results were obtained with a large increase in BLEU scores and a reduction in Translation Error Rate (TER) and Phoneme Error Rate (PER). Moreover, we compared our proposed neural network--based transliteration approach with a statistical one.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Na:2019:TBK, author = "Seung-hoon Na and Jianri Li and Jong-hoon Shin and Kangil Kim", title = "Transition-Based {Korean} Dependency Parsing Using Hybrid Word Representations of Syllables and Morphemes with {LSTMs}", journal = j-TALLIP, volume = "18", number = "2", pages = "14:1--14:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3241745", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3241745", abstract = "Recently, neural approaches for transition-based dependency parsing have become one of the state-of-the art methods for performing dependency parsing tasks in many languages. In neural transition-based parsing, a parser state representation is first computed from the configuration of a stack and a buffer, which is then fed into a feed-forward neural network model that predicts the next transition action. Given that words are basic elements of a stack and buffer, a parser state representation is considerably affected by how a word representation is defined. In particular, word representation issues become more critical in morphologically rich languages such as Korean, as the set of potential words is not bound but introduce the second-order vocabulary complexity, called the phrase vocabulary complexity due to the agglutinative characteristics of the language. In this article, we propose a hybrid word representation that combines two compositional word representations, each of which is derived from representations of syllables and morphemes, respectively. Our underlying assumption for this hybrid word representation is that, because both syllables and morphemes are two common ways of decomposing Korean words, it is expected that their effects in inducing word representation are complementary to one another. Experimental results carried on Sejong and SPMRL 2014 datasets show that our proposed hybrid word representation leads to the state-of-the-art performance.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Akhtar:2019:IWE, author = "Md Shad Akhtar and Palaash Sawant and Sukanta Sen and Asif Ekbal and Pushpak Bhattacharyya", title = "Improving Word Embedding Coverage in Less-Resourced Languages Through Multi-Linguality and Cross-Linguality: a Case Study with Aspect-Based Sentiment Analysis", journal = j-TALLIP, volume = "18", number = "2", pages = "15:1--15:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3273931", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3273931", abstract = "In the era of deep learning-based systems, efficient input representation is one of the primary requisites in solving various problems related to Natural Language Processing (NLP), data mining, text mining, and the like. Absence of adequate representation for an input introduces the problem of data sparsity, and it poses a great challenge to solve the underlying problem. The problem is more intensified with resource-poor languages due to the absence of a sufficiently large corpus required to train a word embedding model. In this work, we propose an effective method to improve the word embedding coverage in less-resourced languages by leveraging bilingual word embeddings learned from different corpora. We train and evaluate deep Long Short Term Memory (LSTM)-based architecture and show the effectiveness of the proposed approach for two aspect-level sentiment analysis tasks (i.e., aspect term extraction and sentiment classification). The neural network architecture is further assisted by hand-crafted features for prediction. We apply the proposed model in two experimental setups: multi-lingual and cross-lingual. Experimental results show the effectiveness of the proposed approach against the state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nakamura:2019:WBR, author = "Tatsuya Nakamura and Masumi Shirakawa and Takahiro Hara and Shojiro Nishio", title = "{Wikipedia}-Based Relatedness Measurements for Multilingual Short Text Clustering", journal = j-TALLIP, volume = "18", number = "2", pages = "16:1--16:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3276473", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3276473", abstract = "Throughout the world, people can post information about their local area in their own languages using social networking services. Multilingual short text clustering is an important task to organize such information, and it can be applied to various applications, such as event detection and summarization. However, measuring the relatedness between short texts written in various languages is a challenging problem. In addition to handling multiple languages, the semantic gaps among all languages must be considered. In this article, we propose two Wikipedia-based semantic relatedness measurement methods for multilingual short text clustering. The proposed methods solve the semantic gap problem by incorporating the inter-language links of Wikipedia into Extended Naive Bayes (ENB), a probabilistic method that can be applied to measure semantic relatedness among monolingual short texts. The proposed methods represent a multilingual short text as a vector of the English version of Wikipedia articles (entities). By transferring texts to a unified vector space, the relatedness between texts in different languages with similar meanings can be increased. We also propose an approach that can improve clustering performance and reduce the processing time by eliminating language-specific entities in the unified vector space. Experimental results on multilingual Twitter message clustering revealed that the proposed methods outperformed cross-lingual explicit semantic analysis, a previously proposed method to measure relatedness between texts in different languages. Moreover, the proposed methods were comparable to ENB applied to texts translated into English using a proprietary translation service. The proposed methods enabled relatedness measurements for multilingual short text clustering without requiring machine translation processes.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2019:NFF, author = "Chenchen Ding and Masao Utiyama and Eiichiro Sumita", title = "{NOVA}: a Feasible and Flexible Annotation System for Joint Tokenization and Part-of-Speech Tagging", journal = j-TALLIP, volume = "18", number = "2", pages = "17:1--17:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3276773", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3276773", abstract = "A feasible and flexible annotation system is designed for joint tokenization and part-of-speech (POS) tagging to annotate those languages without natural definitions of words. This design was motivated by the fact that word separators are not used in many highly analytic East and Southeast Asian languages. Although several of the languages are well-studied, e.g., Chinese and Japanese, many are understudied with low resources, e.g., Burmese (Myanmar) and Khmer. In the first part of the article, the proposed annotation system, named nova, is introduced. nova contains only four basic tags (n, v, a, and o); these tags can be further modified and combined to adapt complex linguistic phenomena in tokenization and POS tagging. In the second part of the article, the feasibility and flexibility of nova is illustrated from the annotation practice on Burmese and Khmer. The relation between nova and two universal POS tagsets is discussed in the final part of the article.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmadi:2019:RBK, author = "Sina Ahmadi", title = "A Rule-Based {Kurdish} Text Transliteration System", journal = j-TALLIP, volume = "18", number = "2", pages = "18:1--18:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3278623", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3278623", abstract = "In this article, we present a rule-based approach for transliterating two of the most used orthographies in Sorani Kurdish. Our work consists of detecting a character in a word by removing the possible ambiguities and mapping it into the target orthography. We describe different challenges in Kurdish text mining and propose novel ideas concerning the transliteration task for Sorani Kurdish. Our transliteration system, named Wergor, achieves 82.79\% overall precision and more than 99\% in detecting the double-usage characters. We also present a manually transliterated corpus for Kurdish.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kamila:2019:THL, author = "Sabyasachi Kamila and Mohammad Hasanuzzaman and Asif Ekbal and Pushpak Bhattacharyya", title = "{Tempo-HindiWordNet}: a Lexical Knowledge-base for Temporal Information Processing", journal = j-TALLIP, volume = "18", number = "2", pages = "19:1--19:??", month = feb, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3277504", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3277504", abstract = "Temporality has significantly contributed to various Natural Language Processing and Information Retrieval applications. In this article, we first create a lexical knowledge-base in Hindi by identifying the temporal orientation of word senses based on their definition and then use this resource to detect underlying temporal orientation of the sentences. To create the resource, we propose a semi-supervised learning framework, where each synset of the Hindi WordNet is classified into one of the five categories, namely, past, present, future, neutral, and atemporal. The algorithm initiates learning with a set of seed synsets and then iterates following different expansion strategies, viz. probabilistic expansion based on classifier's confidence and semantic distance based measures. We manifest the usefulness of the resource that we build on an external task, viz. sentence-level temporal classification. The underlying idea is that a temporal knowledge-base can help in classifying the sentences according to their inherent temporal properties. Experiments on two different domains, viz. general and Twitter, show interesting results.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alnawas:2019:SAI, author = "Anwar Alnawas and Nursal Arici", title = "Sentiment Analysis of {Iraqi Arabic} Dialect on {Facebook} Based on Distributed Representations of Documents", journal = j-TALLIP, volume = "18", number = "3", pages = "20:1--20:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3278605", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3278605", abstract = "Nowadays, social media is used by many people to express their opinions about a variety of topics. Opinion Mining or Sentiment Analysis techniques extract opinions from user generated contents. Over the years, a multitude of Sentiment Analysis studies has been done about the English language with deficiencies of research in all other languages. Unfortunately, Arabic is one of the languages that seems to lack substantial research, despite the rapid growth of its use on social media outlets. Furthermore, specific Arabic dialects should be studied, not just Modern Standard Arabic. In this paper, we experiment sentiments analysis of Iraqi Arabic dialect using word embedding. First, we made a large corpus from previous works to learn word representations. Second, we generated word embedding model by training corpus using Doc2Vec representations based on Paragraph and Distributed Memory Model of Paragraph Vectors (DM-PV) architecture. Lastly, the represented feature used for training four binary classifiers (Logistic Regression, Decision Tree, Support Vector Machine and Naive Bayes) to detect sentiment. We also experimented different values of parameters (window size, dimension and negative samples). In the light of the experiments, it can be concluded that our approach achieves a better performance for Logistic Regression and Support Vector Machine than the other classifiers.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2019:OHG, author = "Sukhdeep Singh and Anuj Sharma", title = "Online Handwritten {Gurmukhi} Words Recognition: an Inclusive Study", journal = j-TALLIP, volume = "18", number = "3", pages = "21:1--21:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3282441", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3282441", abstract = "Identification of offline and online handwritten words is a challenging and complex task. In comparison to Latin and Oriental scripts, the research and study of handwriting recognition at word level in Indic scripts is at its initial phases. The two main methods of handwriting recognition are global and analytical. The present work introduces a novel analytical approach for online handwritten Gurmukhi word recognition based on a minimal set of words and recognizes an input Gurmukhi word as a sequence of characters. We employed a sequential step-by-step approach to recognize online handwritten Gurmukhi words. Considering the massive variability in online Gurmukhi handwriting, the present work employs the completely linked non-homogeneous hidden Markov model. In the present study, we considered the dependent, major-dependent, and super-dependent nature of strokes to form Gurmukhi characters in words. On test sets of online handwritten Gurmukhi datasets, the word-level accuracy rates are 85.98\%, 84.80\%, 82.40\%, and 82.20\% in four different modes. Besides the online Gurmukhi word recognition, the present work also provides Gurmukhi handwriting analysis study for varying writing styles and proposes novel techniques for zone detection and rearrangement of strokes. Our proposed algorithms have been successfully employed to online handwritten Gurmukhi word recognition in dependent and independent modes of handwriting.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yucesoy:2019:COW, author = "Veysel Y{\"u}cesoy and Aykut Ko{\c{c}}", title = "Co-occurrence Weight Selection in Generation of Word Embeddings for Low Resource Languages", journal = j-TALLIP, volume = "18", number = "3", pages = "22:1--22:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3282443", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3282443", abstract = "This study aims to increase the performance of word embeddings by proposing a new weighting scheme for co-occurrence counting. The idea behind this new family of weights is to overcome the disadvantage of distant appearing word pairs, which are indeed semantically close, while representing them in the co-occurrence counting. For high-resource languages, this disadvantage might not be effective due to the high frequency of co-occurrence. However, when there are not enough available resources, such pairs suffer from being distant. To favour such pairs, a weighting scheme based on a polynomial fitting procedure is proposed to shift the weights up for distant words while the weights of nearby words are left almost unchanged. The parameter optimization for new weights and the effects of the weighting scheme are analysed for the English, Italian, and Turkish languages. A small portion of English resources and a quarter of Italian resources are utilized for demonstration purposes, as if these languages are low-resource languages. Performance increase is observed in analogy tests when the proposed weighting scheme is applied to relatively small corpora (i.e., mimicking low-resource languages) of both English and Italian. To show the effectiveness of the proposed scheme in small corpora, it is also shown for a large English corpus that the performance of the proposed weighting scheme cannot outperform the original weights. Since Turkish is relatively a low-resource language, it is demonstrated that the proposed weighting scheme can increase the performance of both analogy and similarity tests when all Turkish Wikipedia pages are utilized as a corpus. The positive effect of the proposed scheme has also been demonstrated in a standard sentiment analysis task for the Turkish language.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bounhas:2019:UCA, author = "Ibrahim Bounhas", title = "On the Usage of a Classical {Arabic} Corpus as a Language Resource: Related Research and Key Challenges", journal = j-TALLIP, volume = "18", number = "3", pages = "23:1--23:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3277591", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3277591", abstract = "This article presents a literature review of computer-science-related research applied on hadith, a kind of Arabic narration which appeared in the 7th century. We study and compare existent works in several fields of Natural Language Processing (NLP), Information Retrieval (IR), and Knowledge Extraction (KE). Thus, we illicit their main drawbacks and identify some perspectives, which may be considered by the research community. We also study the characteristics of these types of documents, by enumerating the advantages/limits of using hadith as a language resource. Moreover, our study shows that previous studies used different collections of hadiths, thus making it hard to compare their results objectively. Besides, many preprocessing steps are recurrent through these applications, thus wasting a lot of time. Consequently, the key issues for building generic language resources from hadiths are discussed, taking into account the relevance of related literature and the wide community of researchers that are interested in these narrations. The ultimate goal is to structure hadith books for multiple usages, thus building common collections which may be exploited in future applications.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jung:2019:MPN, author = "Sangkeun Jung and Cheon-Eum Park and Changki Lee", title = "Multitask Pointer Network for {Korean} Dependency Parsing", journal = j-TALLIP, volume = "18", number = "3", pages = "24:1--24:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3282442", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3282442", abstract = "Dependency parsing is a fundamental problem in natural language processing. We introduce a novel dependency-parsing framework called head-pointing--based dependency parsing. In this framework, we cast the Korean dependency parsing problem as a statistical head-pointing and arc-labeling problem. To address this problem, a novel neural network called the multitask pointer network is devised for a neural sequential head-pointing and type-labeling architecture. Our approach does not require any handcrafted features or language-specific rules to parse dependency. Furthermore, it achieves state-of-the-art performance for Korean dependency parsing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bolucu:2019:UJP, author = "Necva B{\"o}l{\"u}c{\"u} and Burcu Can", title = "Unsupervised Joint {PoS} Tagging and Stemming for Agglutinative Languages", journal = j-TALLIP, volume = "18", number = "3", pages = "25:1--25:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3292398", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3292398", abstract = "The number of possible word forms is theoretically infinite in agglutinative languages. This brings up the out-of-vocabulary (OOV) issue for part-of-speech (PoS) tagging in agglutinative languages. Since inflectional morphology does not change the PoS tag of a word, we propose to learn stems along with PoS tags simultaneously. Therefore, we aim to overcome the sparsity problem by reducing word forms into their stems. We adopt a Bayesian model that is fully unsupervised. We build a Hidden Markov Model for PoS tagging where the stems are emitted through hidden states. Several versions of the model are introduced in order to observe the effects of different dependencies throughout the corpus, such as the dependency between stems and PoS tags or between PoS tags and affixes. Additionally, we use neural word embeddings to estimate the semantic similarity between the word form and stem. We use the semantic similarity as prior information to discover the actual stem of a word since inflection does not change the meaning of a word. We compare our models with other unsupervised stemming and PoS tagging models on Turkish, Hungarian, Finnish, Basque, and English. The results show that a joint model for PoS tagging and stemming improves on an independent PoS tagger and stemmer in agglutinative languages.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kang:2019:SDR, author = "Xiaomian Kang and Chengqing Zong and Nianwen Xue", title = "A Survey of Discourse Representations for {Chinese} Discourse Annotation", journal = j-TALLIP, volume = "18", number = "3", pages = "26:1--26:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3293442", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3293442", abstract = "A key element in computational discourse analysis is the design of a formal representation for the discourse structure of a text. With machine learning being the dominant method, it is important to identify a discourse representation that can be used to perform large-scale annotation. This survey provides a systematic analysis of existing discourse representation theories to evaluate whether they are suitable for annotation of Chinese text. Specifically, the two properties, expressiveness and practicality, are introduced to compare the representations of theories based on rhetorical relations and the representations of theories based on entity relations. The comparison systematically reveals linguistic and computational characteristics of the theories. After that, we conclude that none of the existing theories are quite suitable for scalable Chinese discourse annotation because they are not both expressive and practical. Therefore, a new discourse representation needs to be proposed, which should balance the expressiveness and practicality, and cover rhetorical relations and entity relations. Inspired by the conclusions, this survey discusses some preliminary proposals on how to represent the discourse structure that are worth pursuing.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Badaro:2019:SOM, author = "Gilbert Badaro and Ramy Baly and Hazem Hajj and Wassim El-Hajj and Khaled Bashir Shaban and Nizar Habash and Ahmad Al-Sallab and Ali Hamdi", title = "A Survey of Opinion Mining in {Arabic}: a Comprehensive System Perspective Covering Challenges and Advances in Tools, Resources, Models, Applications, and Visualizations", journal = j-TALLIP, volume = "18", number = "3", pages = "27:1--27:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3295662", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3295662", abstract = "Opinion-mining or sentiment analysis continues to gain interest in industry and academics. While there has been significant progress in developing models for sentiment analysis, the field remains an active area of research for many languages across the world, and in particular for the Arabic language, which is the fifth most-spoken language and has become the fourth most-used language on the Internet. With the flurry of research activity in Arabic opinion mining, several researchers have provided surveys to capture advances in the field. While these surveys capture a wealth of important progress in the field, the fast pace of advances in machine learning and natural language processing (NLP) necessitates a continuous need for a more up-to-date literature survey. The aim of this article is to provide a comprehensive literature survey for state-of-the-art advances in Arabic opinion mining. The survey goes beyond surveying previous works that were primarily focused on classification models. Instead, this article provides a comprehensive system perspective by covering advances in different aspects of an opinion-mining system, including advances in NLP software tools, lexical sentiment and corpora resources, classification models, and applications of opinion mining. It also presents future directions for opinion mining in Arabic. The survey also covers latest advances in the field, including deep learning advances in Arabic Opinion Mining. The article provides state-of-the-art information to help new or established researchers in the field as well as industry developers who aim to deploy an operational complete opinion-mining system. Key insights are captured at the end of each section for particular aspects of the opinion-mining system giving the reader a choice of focusing on particular aspects of interest.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Masmoudi:2019:ADR, author = "Abir Masmoudi and Salima Mdhaffar and Rahma Sellami and Lamia Hadrich Belguith", title = "Automatic Diacritics Restoration for {Tunisian} Dialect", journal = j-TALLIP, volume = "18", number = "3", pages = "28:1--28:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3297278", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3297278", abstract = "Modern Standard Arabic, as well as Arabic dialect languages, are usually written without diacritics. The absence of these marks constitute a real problem in the automatic processing of these data by NLP tools. Indeed, writing Arabic without diacritics introduces several types of ambiguity. First, a word without diacratics could have many possible meanings depending on their diacritization. Second, undiacritized surface forms of an Arabic word might have as many as 200 readings depending on the complexity of its morphology [12]. In fact, the agglutination property of Arabic might produce a problem that can only be resolved using diacritics. Third, without diacritics a word could have many possible parts of speech (POS) instead of one. This is the case with the words that have the same spelling and POS tag but a different lexical sense, or words that have the same spelling but different POS tags and lexical senses [8]. Finally, there is ambiguity at the grammatical level (syntactic ambiguity). In this article, we propose the first work that investigates the automatic diacritization of Tunisian Dialect texts. We first describe our annotation guidelines and procedure. Then, we propose two major models, namely a statistical machine translation (SMT) and a discriminative model as a sequence classification task based on Conditional Random Fields (CRF). In the second approach, we integrate POS features to influence the generation of diacritics. Diacritics restoration was performed at both the word and the character levels. The results showed high scores of automatic diacritization based on the CRF system (Word Error Rate (WER) 21.44\% for CRF and WER 34.6\% for SMT).", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rudra:2019:IAD, author = "Koustav Rudra and Ashish Sharma and Kalika Bali and Monojit Choudhury and Niloy Ganguly", title = "Identifying and Analyzing Different Aspects of {English--Hindi} Code-Switching in {Twitter}", journal = j-TALLIP, volume = "18", number = "3", pages = "29:1--29:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314935", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314935", abstract = "Code-switching or the juxtaposition of linguistic units from two or more languages in a single utterance, has, in recent times, become very common in text, thanks to social media and other computer mediated forms of communication. In this exploratory study of English-Hindi code-switching on Twitter, we automatically create a large corpus of code-switched tweets and devise techniques to identify the relationship between successive components in a code-switched tweet. More specifically, we identify pragmatic functions such as narrative-evaluative, negative reinforcement, translation or semantically equivalent statements, and so on characterizing the relation between successive components. We analyze the difference/similarity between switching patterns in code-switched and monolingual multi-component tweets. We observe strong dominance of narrative-evaluative (non-opinion to opinion or vice versa) switching in case of both code-switched and monolingual multi-component tweets in around 40\% of cases. Polarity switching appears to be a prevalent switching phenomenon (10\%) specifically in code-switched tweets (three to four times higher than monolingual multi-component tweets) where preference of expressing negative sentiment in Hindi is approximately twice compared to English. Positive reinforcement appears to be an important pragmatic function for English multi-component tweets, whereas negative reinforcement plays a key role for Devanagari multi-component tweets. Our results also indicate that the extent and nature of code-switching also strongly depend on the topic (sports, politics, etc.) of discussion.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Verma:2019:CAH, author = "Pradeepika Verma and Sukomal Pal and Hari Om", title = "A Comparative Analysis on {Hindi} and {English} Extractive Text Summarization", journal = j-TALLIP, volume = "18", number = "3", pages = "30:1--30:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3308754", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/python.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3308754", abstract = "Text summarization is the process of transfiguring a large documental information into a clear and concise form. In this article, we present a detailed comparative study of various extractive methods for automatic text summarization on Hindi and English text datasets of news articles. We consider 13 different summarization techniques, namely, TextRank, LexRank, Luhn, LSA, Edmundson, ChunkRank, TGraph, UniRank, NN-ED, NN-SE, FE-SE, SummaRuNNer, and MMR-SE, and we evaluate their performance using various performance metrics, such as precision, recall, F$_1$, cohesion, non-redundancy, readability, and significance. A thorough analysis is done in eight different parts that exhibits the strengths and limitations of these methods, effect of performance over the summary length, impact of language of a document, and other factors as well. A standard summary evaluation tool (ROUGE) and extensive programmatic evaluation using Python 3.5 in Anaconda environment are used to evaluate their outcome.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wei:2019:ROD, author = "Bingzhen Wei and Xuancheng Ren and Yi Zhang and Xiaoyan Cai and Qi Su and Xu Sun", title = "Regularizing Output Distribution of Abstractive {Chinese} Social Media Text Summarization for Improved Semantic Consistency", journal = j-TALLIP, volume = "18", number = "3", pages = "31:1--31:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314934", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314934", abstract = "Abstractive text summarization is a highly difficult problem, and the sequence-to-sequence model has shown success in improving the performance on the task. However, the generated summaries are often inconsistent with the source content in semantics. In such cases, when generating summaries, the model selects semantically unrelated words with respect to the source content as the most probable output. The problem can be attributed to heuristically constructed training data, where summaries can be unrelated to the source content, thus containing semantically unrelated words and spurious word correspondence. In this article, we propose a regularization approach for the sequence-to-sequence model and make use of what the model has learned to regularize the learning objective to alleviate the effect of the problem. In addition, we propose a practical human evaluation method to address the problem that the existing automatic evaluation method does not evaluate the semantic consistency with the source content properly. Experimental results demonstrate the effectiveness of the proposed approach, which outperforms almost all the existing models. Especially, the proposed approach improves the semantic consistency by 4\% in terms of human evaluation.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Trieu:2019:LAR, author = "Hai-Long Trieu and Duc-Vu Tran and Ashwin Ittoo and Le-Minh Nguyen", title = "Leveraging Additional Resources for Improving Statistical Machine Translation on {Asian} Low-Resource Languages", journal = j-TALLIP, volume = "18", number = "3", pages = "32:1--32:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314936", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314936", abstract = "Phrase-based machine translation (MT) systems require large bilingual corpora for training. Nevertheless, such large bilingual corpora are unavailable for most language pairs in the world, causing a bottleneck for the development of MT. For the Asian language pairs-Japanese, Indonesian, Malay paired with Vietnamese-they are also not excluded from the case, in which there are no large bilingual corpora on these low-resource language pairs. Furthermore, although the languages are widely used in the world, there is no prior work on MT, which causes an issue for the development of MT on these languages. In this article, we conducted an empirical study of leveraging additional resources to improve MT for the Asian low-resource language pairs: translation from Japanese, Indonesian, and Malay to Vietnamese. We propose an innovative approach that lies in two strategies of building bilingual corpora from comparable data and phrase pivot translation on existing bilingual corpora of the languages paired with English. Bilingual corpora were built from Wikipedia bilingual titles to enhance bilingual data for the low-resource languages. Additionally, we introduced a combined model of the additional resources to create an effective solution to improve MT on the Asian low-resource languages. Experimental results show the effectiveness of our systems with the improvement of +2 to +7 BLEU points. This work contributes to the development of MT on low-resource languages, especially opening a promising direction for the progress of MT on the Asian language pairs.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dehghan:2019:CDS, author = "Mohammad Hossein Dehghan and Heshaam Faili", title = "Converting Dependency Structure Into {Persian} Phrase Structure", journal = j-TALLIP, volume = "18", number = "3", pages = "33:1--33:??", month = jul, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314937", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:32 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314937", abstract = "Treebank is one of the important and useful resources in natural language processing represented in two different annotated schemas: phrase and dependency structures. There are many works that convert a phrase structure into a dependency structure and vice versa. Most of them are based that exploit the handcrafted head percolation table and argument table in predefined deterministic ways. In this article, we propose a method to convert a dependency structure into a phrase structure by enriching a trainable model of former hybrid strategy approach. By adding a classifier to the algorithm and using postprocessing modification, the quality of conversion is increased. We evaluate our method in two different languages, English and Persian, and then analyze the errors. The results of our experiments show a 46.01\% reduction of error rate in English and 76.50\% for Persian compared to our baseline. We build a new phrase structure treebank by converting 10,000 sentences of Persian dependency treebank into corresponding phrase structures and correcting them manually.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Awais:2019:RDI, author = "Muhammad Awais and Muhammad Shoaib", title = "Role of Discourse Information in {Urdu} Sentiment Classification: a Rule-based Method and Machine-learning Technique", journal = j-TALLIP, volume = "18", number = "4", pages = "34:1--34:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3300050", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3300050", abstract = "In computational linguistics, sentiment analysis refers to the classification of opinions in a positive class or a negative class. There exist a lot of different methods for sentiment analysis of the English language, but the literature lacks the availability of methods and techniques for Urdu, which is the largely spoken language in the South Asian sub-continent and the national language of Pakistan. The currently available techniques, such as adjective count method known as Bag of Words (BoW), is not sufficient for classification of complex sentiment written in the Urdu language. Also, the performance of available machine-learning techniques (with legacy features), for classification of Urdu sentiments, are not comparable with the achieved accuracy of other languages. In the case of the English language, the discourse information (sub-sentence-level information) boosts the performance of both the BoW method and machine-learning techniques, but there are very few works available that have tested the context-level information for the sentiment analysis of the Urdu language. This research aims to extract the discourse information from the Urdu sentiments and utilise the discourse information to improve the performance and reduce the error rate of existing techniques for Urdu Sentiment classification. The proposed solution extracts the discourse information, suggests a new set of features for machine-learning techniques, and introduces a set of rules to extend the capabilities of the BoW model. The results show that the task has been enhanced significantly and the performance metrics such as recall, precision, and accuracy are increased by 31.25\%, 8.46\%, and 21.6\%, respectively. In future, the proposed technique can be extended to sentiments with more than two sub-opinions, such as for blogs, reviews, and TV talk shows.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nongmeikapam:2019:HMM, author = "Kishorjit Nongmeikapam and Kanan Wahengbam and Oinam Nickson Meetei and Themrichon Tuithung", title = "Handwritten {Manipuri Meetei--Mayek} Classification Using Convolutional Neural Network", journal = j-TALLIP, volume = "18", number = "4", pages = "35:1--35:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3309497", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3309497", abstract = "A new technique for classifying all 56 different characters of the Manipuri Meetei-Mayek (MMM) is proposed herein. The characters are grouped under five categories, which are Eeyek Eepee (original alphabets), Lom Eeyek (additional letters), Cheising Eeyek (digits), Lonsum Eeyek (letters with short endings), and Cheitap Eeyek (vowel signs). Two related works proposed by previous researchers are studied for understanding the benefits claimed by the proposed deep learning approach in handwritten Manipuri Meetei-Mayek. (1) Histogram of Oriented (HOG) with SVM classifier is implemented for thoroughly understanding how HOG features can influence accuracy. (2) The handwritten samples are trained using simple Convolutional Neural Network (CNN) and compared with the proposed CNN-based architecture. Significant progress has been made in the field of Optical Character Recognition (OCR) for well-known Indian languages as well as globally popular languages. Our work is novel in the sense that there is no record of work available to date that is able to classify all 56 classes of the MMM. It will also serve as a pre-cursor for developing end-to-end OCR software for translating old manuscripts, newspaper archives, books, and so on.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gao:2019:SBC, author = "Shengxiang Gao and Jihao Huang and Mingya Xue and Zhengtao Yu and Zhuo Wang and Yang Zhang", title = "Syntax-Based {Chinese--Vietnamese} Tree-to-Tree Statistical Machine Translation with Bilingual Features", journal = j-TALLIP, volume = "18", number = "4", pages = "36:1--36:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314938", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314938", abstract = "Because of the scarcity of bilingual corpora, current Chinese--Vietnamese machine translation is far from satisfactory. Considering the differences between Chinese and Vietnamese, we investigate whether linguistic differences can be used to supervise machine translation and propose a method of syntax-based Chinese--Vietnamese tree-to-tree statistical machine translation with bilingual features. Analyzing the syntax differences between Chinese and Vietnamese, we define some linguistic difference-based rules, such as attributive position, time adverbial position, and locative adverbial position, and create rewards for similar rules. These rewards are integrated into the extraction of tree-to-tree translation rules, and we optimize the pruning of the search space during the decoding phase. The experiments on Chinese--Vietnamese bilingual sentence translation show that the proposed method performs better than several compared methods. Further, the results show that syntactic difference features, with search pruning, can improve the accuracy of machine translation without degrading the efficiency.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2019:NSP, author = "Ruiyong Sun and Yijia Zhao and Qi Zhang and Keyu Ding and Shijin Wang and Cui Wei", title = "A Neural Semantic Parser for Math Problems Incorporating Multi-Sentence Information", journal = j-TALLIP, volume = "18", number = "4", pages = "37:1--37:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314939", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314939", abstract = "In this article, we study the problem of parsing a math problem into logical forms. It is an essential pre-processing step for automatically solving math problems. Most of the existing studies about semantic parsing mainly focused on the single-sentence level. However, for parsing math problems, we need to take the information of multiple sentences into consideration. To achieve the task, we formulate the task as a machine translation problem and extend the sequence-to-sequence model with a novel two-encoder architecture and a word-level selective mechanism. For training and evaluating the proposed method, we construct a large-scale dataset. Experimental results show that the proposed two-encoder architecture and word-level selective mechanism could bring significant improvement. The proposed method can achieve better performance than the state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Maimaiti:2019:MRT, author = "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan and Maosong Sun", title = "Multi-Round Transfer Learning for Low-Resource {NMT} Using Multiple High-Resource Languages", journal = j-TALLIP, volume = "18", number = "4", pages = "38:1--38:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314945", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314945", abstract = "Neural machine translation (NMT) has made remarkable progress in recent years, but the performance of NMT suffers from a data sparsity problem since large-scale parallel corpora are only readily available for high-resource languages (HRLs). In recent days, transfer learning (TL) has been used widely in low-resource languages (LRLs) machine translation, while TL is becoming one of the vital directions for addressing the data sparsity problem in low-resource NMT. As a solution, a transfer learning method in NMT is generally obtained via initializing the low-resource model (child) with the high-resource model (parent). However, leveraging the original TL to low-resource models is neither able to make full use of highly related multiple HRLs nor to receive different parameters from the same parents. In order to exploit multiple HRLs effectively, we present a language-independent and straightforward multi-round transfer learning (MRTL) approach to low-resource NMT. Besides, with the intention of reducing the differences between high-resource and low-resource languages at the character level, we introduce a unified transliteration method for various language families, which are both semantically and syntactically highly analogous with each other. Experiments on low-resource datasets show that our approaches are effective, significantly outperform the state-of-the-art methods, and yield improvements of up to 5.63 BLEU points.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ihasz:2019:SFS, author = "Peter Lajos Ihasz and Mate Kovacs and Ian Piumarta and Victor V. Kryssanov", title = "A Supplementary Feature Set for Sentiment Analysis in {Japanese} Dialogues", journal = j-TALLIP, volume = "18", number = "4", pages = "39:1--39:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3310283", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3310283", abstract = "Recently, real-time affect-awareness has been applied in several commercial systems, such as dialogue systems and computer games. Real-time recognition of affective states, however, requires the application of costly feature extraction methods and/or labor-intensive annotation of large datasets, especially in the case of Asian languages where large annotated datasets are seldom available. To improve recognition accuracy, we propose the use of cognitive context in the form of ``emotion-sensitive'' intentions. Intentions are often represented through dialogue acts and, as an emotion-sensitive model of dialogue acts, a tagset of interpersonal-relations-directing interpersonal acts (the IA model) is proposed. The model's adequacy is assessed using a sentiment classification task in comparison with two well-known dialogue act models, the SWBD-DAMSL and the DIT++. For the assessment, five Japanese in-game dialogues were annotated with labels of sentiments and the tags of all three dialogue act models which were used to enhance a baseline sentiment classifier system. The adequacy of the IA tagset is demonstrated by a 9\% improvement to the baseline sentiment classifier's recognition accuracy, outperforming the other two models by more than 5\%.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saeed:2019:SAC, author = "Ali Saeed and Rao Muhammad Adeel Nawab and Mark Stevenson and Paul Rayson", title = "A Sense Annotated Corpus for All-Words {Urdu} Word Sense Disambiguation", journal = j-TALLIP, volume = "18", number = "4", pages = "40:1--40:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314940", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314940", abstract = "Word Sense Disambiguation (WSD) aims to automatically predict the correct sense of a word used in a given context. All human languages exhibit word sense ambiguity, and resolving this ambiguity can be difficult. Standard benchmark resources are required to develop, compare, and evaluate WSD techniques. These are available for many languages, but not for Urdu, despite this being a language with more than 300 million speakers and large volumes of text available digitally. To fill this gap, this study proposes a novel benchmark corpus for the Urdu All-Words WSD task. The corpus contains 5,042 words of Urdu running text in which all ambiguous words (856 instances) are manually tagged with senses from the Urdu Lughat dictionary. A range of baseline WSD models based on n -gram are applied to the corpus, and the best performance (accuracy of 57.71\%) is achieved using word 4-gram. The corpus is freely available to the research community to encourage further WSD research in Urdu.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dahou:2019:MCE, author = "Abdelghani Dahou and Shengwu Xiong and Junwei Zhou and Mohamed Abd Elaziz", title = "Multi-Channel Embedding Convolutional Neural Network Model for {Arabic} Sentiment Classification", journal = j-TALLIP, volume = "18", number = "4", pages = "41:1--41:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314941", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314941", abstract = "With the advent of social network services, Arabs' opinions on the web have attracted many researchers in recent years toward detecting and classifying sentiments in Arabic tweets and reviews. However, the impact of word embeddings vectors (WEVs) initialization and dataset balance on Arabic sentiment classification using deep learning has not been thoroughly studied. In this article, a multi-channel embedding convolutional neural network (MCE-CNN) is proposed to improve Arabic sentiment classification by learning sentiment features from different text domains, word, and character n-grams levels. MCE-CNN encodes a combination of different pre-trained word embeddings into the embedding block at each embedding channel and trains these channels in parallel. Besides, a separate feature extraction module implemented in a CNN block is used to extract more relevant sentiment features. These channels and blocks help to start training on high-quality WEVs and fine-tuning them. The performance of MCE-CNN is evaluated on several standard balanced and imbalanced datasets to reflect real-world use cases. Experimental results show that MCE-CNN provides a high classification accuracy and benefits from the second embedding channel on both standard Arabic and dialectal Arabic text, which outperforms state-of-the-art methods.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Onyenwe:2019:TEI, author = "Ikechukwu E. Onyenwe and Mark Hepple and Uchechukwu Chinedu and Ignatius Ezeani", title = "Toward an Effective {Igbo} Part-of-Speech Tagger", journal = j-TALLIP, volume = "18", number = "4", pages = "42:1--42:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314942", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314942", abstract = "Part-of-speech (POS) tagging is a well-established technology for most Western European languages and a few other world languages, but it has not been evaluated on Igbo, an agglutinative African language. This article presents POS tagging experiments conducted using an Igbo corpus as a test bed for identifying the POS taggers and the Machine Learning (ML) methods that can achieve a good performance with the small dataset available for the language. Experiments have been conducted using different well-known POS taggers developed for English or European languages, and different training data styles and sizes. Igbo has a number of language-specific characteristics that present a challenge for effective POS tagging. One interesting case is the wide use of verbs (and nominalizations thereof) that have an inherent noun complement, which form ``linked pairs'' in the POS tagging scheme, but which may appear discontinuously. Another issue is Igbo's highly productive agglutinative morphology, which can produce many variant word forms from a given root. This productivity is a key cause of the out-of-vocabulary (OOV) words observed during Igbo tagging. We report results of experiments on a promising direction for improving tagging performance on such morphologically-inflected OOV words.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Costa-Jussa:2019:CCN, author = "Marta R. Costa-Juss{\`a} and No{\'e} Casas and Carlos Escolano and Jos{\'e} A. R. Fonollosa", title = "{Chinese--Catalan}: a Neural Machine Translation Approach Based on Pivoting and Attention Mechanisms", journal = j-TALLIP, volume = "18", number = "4", pages = "43:1--43:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3312575", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3312575", abstract = "This article innovatively addresses machine translation from Chinese to Catalan using neural pivot strategies trained without any direct parallel data. The Catalan language is very similar to Spanish from a linguistic point of view, which motivates the use of Spanish as pivot language. Regarding neural architecture, we are using the latest state-of-the-art, which is the Transformer model, only based on attention mechanisms. Additionally, this work provides new resources to the community, which consists of a human-developed gold standard of 4,000 sentences between Catalan and Chinese and all the others United Nations official languages (Arabic, English, French, Russian, and Spanish). Results show that the standard pseudo-corpus or synthetic pivot approach performs better than cascade.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2019:MTE, author = "Hui Yu and Weizhi Xu and Shouxun Lin and Qun Liu", title = "Machine Translation Evaluation Metric Based on Dependency Parsing Model", journal = j-TALLIP, volume = "18", number = "4", pages = "44:1--44:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3312573", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3312573", abstract = "Most of the syntax-based metrics obtain the similarity by comparing the sub-structures extracted from the trees of hypothesis and reference. These sub-structures cannot represent all the information in the trees because their lengths are limited. To sufficiently use the reference syntax information, a new automatic evaluation metric is proposed based on the dependency parsing model. First, a dependency parsing model is trained using the reference dependency tree for each sentence. Then, the hypothesis is parsed by this dependency parsing model and the corresponding hypothesis dependency tree is generated. The quality of hypothesis can be judged by the quality of the hypothesis dependency tree. Unigram F-score is included in the new metric so that lexicon similarity is obtained. According to experimental results, the proposed metric can perform better than METEOR and BLEU on system level and get comparable results with METEOR on sentence level. To further improve the performance, we also propose a combined metric which gets the best performance on the sentence level and on the system level.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2019:EBC, author = "Yang Liu and Shaonan Wang and Jiajun Zhang and Chengqing Zong", title = "Experience-based Causality Learning for Intelligent Agents", journal = j-TALLIP, volume = "18", number = "4", pages = "45:1--45:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3314943", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3314943", abstract = "Understanding causality in text is crucial for intelligent agents. In this article, inspired by human causality learning, we propose an experience-based causality learning framework. Comparing to traditional approaches, which attempt to handle the causality problem relying on textual clues and linguistic resources, we are the first to use experience information for causality learning. Specifically, we first construct various scenarios for intelligent agents, thus, the agents can gain experience from interaction in these scenarios. Then, human participants build a number of training instances for agents of causality learning based on these scenarios. Each instance contains two sentences and a label. Each sentence describes an event that an agent experienced in a scenario, and the label indicates whether the sentence (event) pair has a causal relation. Accordingly, we propose a model that can infer the causality in text using experience by accessing the corresponding event information based on the input sentence pair. Experiment results show that our method can achieve impressive performance on the grounded causality corpus and significantly outperform the conventional approaches. Our work suggests that experience is very important for intelligent agents to understand causality.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yin:2019:PTE, author = "Yongjing Yin and Jinsong Su and Huating Wen and Jiali Zeng and Yang Liu and Yidong Chen", title = "{POS} Tag-enhanced Coarse-to-fine Attention for Neural Machine Translation", journal = j-TALLIP, volume = "18", number = "4", pages = "46:1--46:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3321124", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321124", abstract = "Although neural machine translation (NMT) has certain capability to implicitly learn semantic information of sentences, we explore and show that Part-of-Speech (POS) tags can be explicitly incorporated into the attention mechanism of NMT effectively to yield further improvements. In this article, we propose an NMT model with tag-enhanced attention mechanism. In our model, NMT and POS tagging are jointly modeled via multi-task learning. Besides following common practice to enrich encoder annotations by introducing predicted source POS tags, we exploit predicted target POS tags to refine attention model in a coarse-to-fine manner. Specifically, we first implement a coarse attention operation solely on source annotations and target hidden state, where the produced context vector is applied to update target hidden state used for target POS tagging. Then, we perform a fine attention operation that extends the coarse one by further exploiting the predicted target POS tags. Finally, we facilitate word prediction by simultaneously utilizing the context vector from fine attention and the predicted target POS tags. Experimental results and further analyses on Chinese--English and Japanese-English translation tasks demonstrate the superiority of our proposed model over the conventional NMT models. We release our code at https://github.com/middlekisser/PEA-NMT.git.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2019:MEA, author = "Jun Yang and Runqi Yang and Hengyang Lu and Chongjun Wang and Junyuan Xie", title = "Multi-Entity Aspect-Based Sentiment Analysis with Context, Entity, Aspect Memory and Dependency Information", journal = j-TALLIP, volume = "18", number = "4", pages = "47:1--47:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3321125", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321125", abstract = "Fine-grained sentiment analysis is a useful tool for producers to understand consumers' needs as well as complaints about products and related aspects from online platforms. In this article, we define a novel task named ``Multi-Entity Aspect-Based Sentiment Analysis (ME-ABSA)''. It investigates the sentiment towards entities and their related aspects. It makes the well-studied aspect-based sentiment analysis a special case of this type, where the number of entities is limited to one. We contribute a new dataset for this task, with multi-entity Chinese posts in it. We propose to model context, entity, and aspect memory to address the task and incorporate dependency information for further improvement. Experiments show that our methods perform significantly better than baseline methods on datasets for both ME-ABSA task and ABSA task. The in-depth analysis further validates the effectiveness of our methods and shows that our methods are capable of generalizing to new (entity, aspect) combinations with little loss of accuracy. This observation indicates that data annotation in real applications can be largely simplified.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kim:2019:MTS, author = "Hyun Kim and Jong-Hyeok Lee and Seung-Hoon Na", title = "Multi-task Stack Propagation for Neural Quality Estimation", journal = j-TALLIP, volume = "18", number = "4", pages = "48:1--48:??", month = aug, year = "2019", CODEN = "????", DOI = "https://doi.org/10.1145/3321127", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Oct 2 10:34:33 MDT 2019", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/ft_gateway.cfm?id=3321127", abstract = "Quality estimation is an important task in machine translation that has attracted increased interest in recent years. A key problem in translation-quality estimation is the lack of a sufficient amount of the quality annotated training data. To address this shortcoming, the Predictor-Estimator was proposed recently by introducing ``word prediction'' as an additional pre-subtask that predicts a current target word with consideration of surrounding source and target contexts, resulting in a two-stage neural model composed of a predictor and an estimator. However, the original Predictor-Estimator is not trained on a continuous stacking model but instead in a cascaded manner that separately trains the predictor from the estimator. In addition, the Predictor-Estimator is trained based on single-task learning only, which uses target-specific quality-estimation data without using other training data that are available from other-level quality-estimation tasks. In this article, we thus propose a multi-task stack propagation, which extensively applies stack propagation to fully train the Predictor-Estimator on a continuous stacking architecture and multi-task learning to enhance the training data from related other-level quality-estimation tasks. Experimental results on WMT17 quality-estimation datasets show that the Predictor-Estimator trained with multi-task stack propagation provides statistically significant improvements over the baseline models. In particular, under an ensemble setting, the proposed multi-task stack propagation leads to state-of-the-art performance at all the sentence/word/phrase levels for WMT17 quality estimation tasks.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2020:GCL, author = "Hongmin Wang and Jie Yang and Yue Zhang", title = "From {Genesis} to {Creole} Language: Transfer Learning for {Singlish} Universal Dependencies Parsing and {POS} Tagging", journal = j-TALLIP, volume = "19", number = "1", pages = "1--29", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3321128", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3321128", abstract = "Singlish can be interesting to the computational linguistics community both linguistically, as a major low-resource creole based on English, and computationally, for information extraction and sentiment analysis of regional social media. In our \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kong:2020:CZP, author = "Fang Kong and Min Zhang and Guodong Zhou", title = "{Chinese} Zero Pronoun Resolution: a Chain-to-chain Approach", journal = j-TALLIP, volume = "19", number = "1", pages = "1--21", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3321129", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3321129", abstract = "Chinese zero pronoun (ZP) resolution plays a critical role in discourse analysis. Different from traditional mention-to-mention approaches, this article proposes a chain-to-chain approach to improve the performance of ZP resolution in three aspects. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yin:2020:CZP, author = "Qingyu Yin and Weinan Zhang and Yu Zhang and Ting Liu", title = "{Chinese} Zero Pronoun Resolution: a Collaborative Filtering-based Approach", journal = j-TALLIP, volume = "19", number = "1", pages = "1--20", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3325884", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325884", abstract = "Semantic information that has been proven to be necessary to the resolution of common noun phrases is typically ignored by most existing Chinese zero pronoun resolvers. This is because that zero pronouns convey no descriptive information, which makes it \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2020:TCT, author = "Ayan Das and Sudeshna Sarkar", title = "Transform, Combine, and Transfer: Delexicalized Transfer Parser for Low-resource Languages", journal = j-TALLIP, volume = "19", number = "1", pages = "1--30", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3325886", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325886", abstract = "Transfer parsing has been used for developing dependency parsers for languages with no treebank by using transfer from treebanks of other languages (source languages). In delexicalized transfer, parsed words are replaced by their part-of-speech tags. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2020:TBM, author = "Chenchen Ding and Hnin Thu Zar Aye and Win Pa Pa and Khin Thandar Nwet and Khin Mar Soe and Masao Utiyama and Eiichiro Sumita", title = "Towards {Burmese} ({Myanmar}) Morphological Analysis: Syllable-based Tokenization and Part-of-speech Tagging", journal = j-TALLIP, volume = "19", number = "1", pages = "1--34", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3325885", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325885", abstract = "This article presents a comprehensive study on two primary tasks in Burmese (Myanmar) morphological analysis: tokenization and part-of-speech (POS) tagging. Twenty thousand Burmese sentences of newswire are annotated with two-layer tokenization and POS-\ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:AMC, author = "Dayiheng Liu and Kexin Yang and Qian Qu and Jiancheng Lv", title = "Ancient--Modern {Chinese} Translation with a New Large Training Dataset", journal = j-TALLIP, volume = "19", number = "1", pages = "1--13", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3325887", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3325887", abstract = "Ancient Chinese brings the wisdom and spirit culture of the Chinese nation. Automatic translation from ancient Chinese to modern Chinese helps to inherit and carry forward the quintessence of the ancients. However, the lack of large-scale parallel \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2020:CSP, author = "Wei Wang and Degen Huang and Jingxiang Cao", title = "{Chinese} Syntax Parsing Based on Sliding Match of Semantic String", journal = j-TALLIP, volume = "19", number = "1", pages = "1--14", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3329707", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329707", abstract = "Different from the current syntax parsing based on deep learning, we present a novel Chinese parsing method, which is based on Sliding Match of Semantic String (SMOSS). (1) Training stage: In a treebank, headwords of tree nodes are represented by \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kanwal:2020:UNE, author = "Safia Kanwal and Kamran Malik and Khurram Shahzad and Faisal Aslam and Zubair Nawaz", title = "{Urdu} Named Entity Recognition: Corpus Generation and Deep Learning Applications", journal = j-TALLIP, volume = "19", number = "1", pages = "1--13", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3329710", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329710", abstract = "Named Entity Recognition (NER) plays a pivotal role in various natural language processing tasks, such as machine translation and automatic question-answering systems. Recognizing the importance of NER, a plethora of NER techniques for Western and Asian \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:DCW, author = "Yijia Liu and Wanxiang Che and Yuxuan Wang and Bo Zheng and Bing Qin and Ting Liu", title = "Deep Contextualized Word Embeddings for Universal Dependency Parsing", journal = j-TALLIP, volume = "19", number = "1", pages = "1--17", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3326497", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3326497", abstract = "Deep contextualized word embeddings (Embeddings from Language Model, short for ELMo), as an emerging and effective replacement for the static word embeddings, have achieved success on a bunch of syntactic and semantic NLP problems. However, little is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mehmood:2020:SAR, author = "Khawar Mehmood and Daryl Essam and Kamran Shafi and Muhammad Kamran Malik", title = "Sentiment Analysis for a Resource Poor Language ---{Roman Urdu}", journal = j-TALLIP, volume = "19", number = "1", pages = "1--15", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3329709", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329709", abstract = "Sentiment analysis is an important sub-task of Natural Language Processing that aims to determine the polarity of a review. Most of the work done on sentiment analysis is for the resource-rich languages of the world, but very limited work has been done \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bakhshaei:2020:MGM, author = "Somayeh Bakhshaei and Reza Safabakhsh and Shahram Khadivi", title = "Matching Graph, a Method for Extracting Parallel Information from Comparable Corpora", journal = j-TALLIP, volume = "19", number = "1", pages = "1--29", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3329713", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3329713", abstract = "Comparable corpora are valuable alternatives for the expensive parallel corpora. They comprise informative parallel fragments that are useful resources for different natural language processing tasks. In this work, a generative model is proposed for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:FTV, author = "Dayiheng Liu and Yang Xue and Feng He and Yuanyuan Chen and Jiancheng Lv", title = "$ \mu $-Forcing: Training Variational Recurrent Autoencoders for Text Generation", journal = j-TALLIP, volume = "19", number = "1", pages = "1--17", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3341110", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3341110", abstract = "It has been previously observed that training Variational Recurrent Autoencoders (VRAE) for text generation suffers from serious uninformative latent variables problems. The model would collapse into a plain language model that totally ignores the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Srivastava:2020:AMA, author = "Jyoti Srivastava and Sudip Sanyal and Ashish Kumar Srivastava", title = "An Automatic and a Machine-assisted Method to Clean Bilingual Corpus", journal = j-TALLIP, volume = "19", number = "1", pages = "1--19", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342351", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342351", abstract = "Two different methods of corpus cleaning are presented in this article. One is a machine-assisted technique, which is good to clean small-sized parallel corpus, and the other is an automatic method, which is suitable for cleaning large-sized parallel \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Prakash:2020:ISP, author = "Jeena J. Prakash and Golda Brunet Rajan and Hema A. Murthy", title = "Importance of Signal Processing Cues in Transcription Correction for Low-Resource {Indian} Languages", journal = j-TALLIP, volume = "19", number = "1", pages = "1--26", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342352", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342352", abstract = "Accurate phonetic transcriptions are crucial for building robust acoustic models for speech recognition as well as speech synthesis applications. Phonetic transcriptions are not usually provided with speech corpora. A lexicon is used to generate phone-\ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Han:2020:EMW, author = "Dong Han and Junhui Li and Yachao Li and Min Zhang and Guodong Zhou", title = "Explicitly Modeling Word Translations in Neural Machine Translation", journal = j-TALLIP, volume = "19", number = "1", pages = "1--17", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342353", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342353", abstract = "In this article, we show that word translations can be explicitly incorporated into NMT effectively to avoid wrong translations. Specifically, we propose three cross-lingual encoders to explicitly incorporate word translations into NMT: (1) Factored\ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chakrabarty:2020:NNM, author = "Abhisek Chakrabarty and Akshay Chaturvedi and Utpal Garain", title = "{NeuMorph}: Neural Morphological Tagging for Low-Resource Languages --- an Experimental Study for {Indic} Languages", journal = j-TALLIP, volume = "19", number = "1", pages = "1--19", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342354", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342354", abstract = "This article deals with morphological tagging for low-resource languages. For this purpose, five Indic languages are taken as reference. In addition, two severely resource-poor languages, Coptic and Kurmanji, are also considered. The task entails \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ji:2020:ATU, author = "Yatu Ji and Hongxu Hou and Junjie Chen and Nier Wu", title = "Adversarial Training for Unknown Word Problems in Neural Machine Translation", journal = j-TALLIP, volume = "19", number = "1", pages = "1--12", month = jan, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342482", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 10 08:11:41 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342482", abstract = "Nearly all of the work in neural machine translation (NMT) is limited to a quite restricted vocabulary, crudely treating all other words the same as an unk symbol. For the translation of language with abundant morphology, unknown (UNK) words also \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhu:2020:OSK, author = "Qingfu Zhu and Weinan Zhang and Lei Cui and Ting Liu", title = "Order-Sensitive Keywords Based Response Generation in Open-Domain Conversational Systems", journal = j-TALLIP, volume = "19", number = "2", pages = "18:1--18:18", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3343258", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3343258", abstract = "External keywords are crucial for response generation models to address the generic response problems in open-domain conversational systems. The occurrence of keywords in a response depends heavily on the order of the keywords as they are generated \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2020:NCG, author = "Guangyou Zhou and Yizhen Fang and Yehong Peng and Jiaheng Lu", title = "Neural Conversation Generation with Auxiliary Emotional Supervised Models", journal = j-TALLIP, volume = "19", number = "2", pages = "19:1--19:17", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3344788", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3344788", abstract = "An important aspect of developing dialogue agents involves endowing a conversation system with emotion perception and interaction. Most existing emotion dialogue models lack the adaptability and extensibility of different scenes because of their \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhu:2020:EDC, author = "Wenhao Zhu and Xin Jin and Shuang Liu and Zhiguo Lu and Wu Zhang and Ke Yan and Baogang Wei", title = "Enhanced Double-Carrier Word Embedding via Phonetics and Writing", journal = j-TALLIP, volume = "19", number = "2", pages = "20:1--20:18", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3344920", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3344920", abstract = "Word embeddings, which map words into a unified vector space, capture rich semantic information. From a linguistic point of view, words have two carriers, speech and writing. Yet the most recent word embedding models focus on only the writing carrier \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dehkharghani:2020:SPP, author = "Rahim Dehkharghani", title = "{SentiFars}: a {Persian} Polarity Lexicon for Sentiment Analysis", journal = j-TALLIP, volume = "19", number = "2", pages = "21:1--21:12", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3345627", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3345627", abstract = "There is no doubt about the usefulness of public opinion toward different issues in social media and the World Wide Web. Extracting the feelings of people about an issue from text is not straightforward. Polarity lexicons that assign polarity tags or \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abdulhameed:2020:WVT, author = "Tiba Zaki Abdulhameed and Imed Zitouni and Ikhlas Abdel-Qader", title = "{Wasf-Vec}: Topology-based Word Embedding for Modern Standard {Arabic} and {Iraqi} Dialect Ontology", journal = j-TALLIP, volume = "19", number = "2", pages = "22:1--22:27", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3345517", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3345517", abstract = "Word clustering is a serious challenge in low-resource languages. Since words that share semantics are expected to be clustered together, it is common to use a feature vector representation generated from a distributional theory-based word embedding \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2020:EPS, author = "Ge Xu and Xiaoyan Yang and Yuanzheng Cai and Zhiqiang Ruan and Tao Wang and Xiangwen Liao", title = "Extracting Polarity Shifting Patterns from Any Corpus Based on Natural Annotation", journal = j-TALLIP, volume = "19", number = "2", pages = "23:1--23:16", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3345518", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3345518", abstract = "In recent years, online sentiment texts are generated by users in various domains and in different languages. Binary polarity classification (positive or negative) on business sentiment texts can help both companies and customers to evaluate products or \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Imankulova:2020:FPP, author = "Aizhan Imankulova and Takayuki Sato and Mamoru Komachi", title = "Filtered Pseudo-parallel Corpus Improves Low-resource Neural Machine Translation", journal = j-TALLIP, volume = "19", number = "2", pages = "24:1--24:16", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3341726", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3341726", abstract = "Large-scale parallel corpora are essential for training high-quality machine translation systems; however, such corpora are not freely available for many language translation pairs. Previously, training data has been augmented by pseudo-parallel corpora \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gupta:2020:DNN, author = "Deepak Gupta and Asif Ekbal and Pushpak Bhattacharyya", title = "A Deep Neural Network Framework for {English} {Hindi} Question Answering", journal = j-TALLIP, volume = "19", number = "2", pages = "25:1--25:22", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359988", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359988", abstract = "In this article, we propose a unified deep neural network framework for multilingual question answering (QA). The proposed network deals with the multilingual questions and answers snippets. The input to the network is a pair of factoid question and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2020:LWT, author = "Hongfei Yu and Xiaoqing Zhou and Xiangyu Duan and Min Zhang", title = "Layer-Wise De-Training and Re-Training for {ConvS2S} Machine Translation", journal = j-TALLIP, volume = "19", number = "2", pages = "26:1--26:15", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3358414", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3358414", abstract = "The convolutional sequence-to-sequence (ConvS2S) machine translation system is one of the typical neural machine translation (NMT) systems. Training the ConvS2S model tends to get stuck in a local optimum in our pre-studies. To overcome this inferior \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Somsap:2020:IDW, author = "Sittichai Somsap and Pusadee Seresangtakul", title = "{Isarn Dharma} Word Segmentation Using a Statistical Approach with Named Entity Recognition", journal = j-TALLIP, volume = "19", number = "2", pages = "27:1--27:16", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359990", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359990", abstract = "In this study, we developed an Isarn Dharma word segmentation system. We mainly focused on solving the word ambiguity and unknown word problems in unsegmented Isarn Dharma text. Ambiguous Isarn Dharma words occur frequently in word construction due to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abbas:2020:PIR, author = "Muhammad Raihan Abbas and Khadim Hussain Asif", title = "{Punjabi} to {ISO 15919} and {Roman} Transliteration with Phonetic Rectification", journal = j-TALLIP, volume = "19", number = "2", pages = "28:1--28:20", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3359991", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3359991", abstract = "Transliteration removes the script barriers. Unfortunately, Punjabi is written in four different scripts, i.e., Gurmukhi, Shahmukhi, Devnagri, and Latin. The Latin script is understandable for nearly all factions of the Punjabi community. The objective \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Beseiso:2020:SAM, author = "Majdi Beseiso and Haytham Elmousalami", title = "Subword Attentive Model for {Arabic} Sentiment Analysis: a Deep Learning Approach", journal = j-TALLIP, volume = "19", number = "2", pages = "29:1--29:17", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3360016", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3360016", abstract = "Social media data is unstructured data where these big data are exponentially increasing day to day in many different disciplines. Analysis and understanding the semantics of these data are a big challenge due to its variety and huge volume. To address \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Harikrishna:2020:CSC, author = "D. M. Harikrishna and K. Sreenivasa Rao", title = "{Children}'s Story Classification in {Indian} Languages Using Linguistic and Keyword-based Features", journal = j-TALLIP, volume = "19", number = "2", pages = "30:1--30:22", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3342356", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3342356", abstract = "The primary objective of this work is to classify Hindi and Telugu stories into three genres: fable, folk-tale, and legend. In this work, we are proposing a framework for story classification (SC) using keyword and part-of-speech (POS) features. For \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jung:2020:WRT, author = "Hun-Young Jung and Jong-Hyeok Lee and Eunju Min and Seung-Hoon Na", title = "Word Reordering for Translation into {Korean} Sign Language Using Syntactically-guided Classification", journal = j-TALLIP, volume = "19", number = "2", pages = "31:1--31:20", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3357612", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3357612", abstract = "Machine translation aims to break the language barrier that prevents communication with others and increase access to information. Deaf people face huge language barriers in their daily lives, including access to digital and spoken information. There \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Masmoudi:2020:TAA, author = "Abir Masmoudi and Mariem Ellouze Khmekhem and Mourad Khrouf and Lamia Hadrich Belguith", title = "Transliteration of {Arabizi} into {Arabic} Script for {Tunisian} Dialect", journal = j-TALLIP, volume = "19", number = "2", pages = "32:1--32:21", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3364319", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3364319", abstract = "The evolution of information and communication technology has markedly influenced communication between correspondents. This evolution has facilitated the transmission of information and has engendered new forms of written communication (email, chat, \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mukherjee:2020:FST, author = "Subham Mukherjee and Pradeep Kumar and Partha Pratim Roy", title = "Fusion of Spatio-temporal Information for {Indic} Word Recognition Combining Online and Offline Text Data", journal = j-TALLIP, volume = "19", number = "2", pages = "33:1--33:24", month = mar, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3364533", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:05:40 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3364533", abstract = "We present a novel Indic handwritten word recognition scheme by fusion of spatio-temporal information extracted from handwritten images. The main challenge in Indic word recognition lies in its complexity because of modifiers, touching characters, and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2020:ELR, author = "Zhiqiang Yu and Zhengtao Yu and Junjun Guo and Yuxin Huang and Yonghua Wen", title = "Efficient Low-Resource Neural Machine Translation with Reread and Feedback Mechanism", journal = j-TALLIP, volume = "19", number = "3", pages = "34:1--34:13", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365244", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365244", abstract = "How to utilize information sufficiently is a key problem in neural machine translation (NMT), which is effectively improved in rich-resource NMT by leveraging large-scale bilingual sentence pairs. However, for low-resource NMT, lack of bilingual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Park:2020:NSB, author = "Cheoneum Park and Heejun Song and Changki Lee", title = "{$ S^3$-NET}: {SRU}-Based Sentence and Self-Matching Networks for Machine Reading Comprehension", journal = j-TALLIP, volume = "19", number = "3", pages = "35:1--35:14", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365679", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365679", abstract = "Machine reading comprehension question answering (MRC-QA) is the task of understanding the context of a given passage to find a correct answer within it. A passage is composed of several sentences; therefore, the length of the input sentence becomes \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarwar:2020:SSF, author = "Raheem Sarwar and Thanasarn Porthaveepong and Attapol Rutherford and Thanawin Rakthanmanon and Sarana Nutanong", title = "{StyloThai}:: a Scalable Framework for Stylometric Authorship Identification of {Thai} Documents", journal = j-TALLIP, volume = "19", number = "3", pages = "36:1--36:15", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365832", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365832", abstract = "Authorship identification helps to identify the true author of a given anonymous document from a set of candidate authors. The applications of this task can be found in several domains, such as law enforcement agencies and information retrieval. These \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kim:2020:UIB, author = "Hyun Kim and Seung-Hoon Na", title = "Uniformly Interpolated Balancing for Robust Prediction in Translation Quality Estimation: a Case Study of {English--Korean} Translation", journal = j-TALLIP, volume = "19", number = "3", pages = "37:1--37:27", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3365916", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3365916", abstract = "There has been growing interest among researchers in quality estimation (QE), which attempts to automatically predict the quality of machine translation (MT) outputs. Most existing works on QE are based on supervised approaches using quality-annotated \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2020:LMU, author = "Xiao Zhou and Zhen-Hua Ling and Li-Rong Dai", title = "Learning and Modeling Unit Embeddings Using Deep Neural Networks for Unit-Selection-Based {Mandarin} Speech Synthesis", journal = j-TALLIP, volume = "19", number = "3", pages = "38:1--38:14", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372244", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372244", abstract = "A method of learning and modeling unit embeddings using deep neutral networks (DNNs) is presented in this article for unit-selection-based Mandarin speech synthesis. Here, a unit embedding is defined as a fixed-length embedding vector for a phone-sized \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mirzaei:2020:SRL, author = "Azadeh Mirzaei and Fatemeh Sedghi and Pegah Safari", title = "Semantic Role Labeling System for {Persian} Language", journal = j-TALLIP, volume = "19", number = "3", pages = "39:1--39:12", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3372246", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3372246", abstract = "In this article, we present an automatic semantic role labeling system in Persian consisting of two modules: argument identification for specifying argument spans and argument classification for categorizing their semantic roles. Our modules have been \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2020:BMT, author = "Chenchen Ding and Sann Su Su Yee and Win Pa Pa and Khin Mar Soe and Masao Utiyama and Eiichiro Sumita", title = "A {Burmese} ({Myanmar}) {Treebank}: Guideline and Analysis", journal = j-TALLIP, volume = "19", number = "3", pages = "40:1--40:13", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3373268", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373268", abstract = "A 20,000-sentence Burmese (Myanmar) treebank on news articles has been released under a CC BY-NC-SA license. Complete phrase structure annotation was developed for each sentence from the morphologically annotated data prepared in previous work of Ding \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Song:2020:KPS, author = "Hyun-Je Song and Seong-Bae Park", title = "{Korean} Part-of-speech Tagging Based on Morpheme Generation", journal = j-TALLIP, volume = "19", number = "3", pages = "41:1--41:10", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3373608", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3373608", abstract = "Two major problems of Korean part-of-speech (POS) tagging are that the word-spacing unit is not mapped one-to-one to a POS tag and that morphemes should be recovered during POS tagging. Therefore, this article proposes a novel two-step Korean POS tagger \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mi:2020:LIL, author = "Chenggang Mi and Lei Xie and Yanning Zhang", title = "Loanword Identification in Low-Resource Languages with Minimal Supervision", journal = j-TALLIP, volume = "19", number = "3", pages = "43:1--43:22", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3374212", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3374212", abstract = "Bilingual resources play a very important role in many natural language processing tasks, especially the tasks in cross-lingual scenarios. However, it is expensive and time consuming to build such resources. Lexical borrowing happens in almost every \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2020:INM, author = "Yachao Li and Junhui Li and Min Zhang and Yixin Li and Peng Zou", title = "Improving Neural Machine Translation with Linear Interpolation of a Short-Path Unit", journal = j-TALLIP, volume = "19", number = "3", pages = "44:1--44:16", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377851", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377851", abstract = "In neural machine translation (NMT), the source and target words are at the two ends of a large deep neural network, normally mediated by a series of non-linear activations. The problem with such consequent non-linear activations is that they \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:DUK, author = "Xiao-Yang Liu and Yimeng Zhang and Yukang Liao and Ling Jiang", title = "Dynamic Updating of the Knowledge Base for a Large-Scale Question Answering System", journal = j-TALLIP, volume = "19", number = "3", pages = "45:1--45:13", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377708", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377708", abstract = "Today, the knowledge base question answering (KB-QA) system is promising to achieve a large-scale high-quality reply in the e-commerce industry. However, there exist two major challenges to efficiently support large-scale KB-QA systems. On the one hand, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:ELM, author = "Shih-Hung Liu and Kuan-Yu Chen and Berlin Chen", title = "Enhanced Language Modeling with Proximity and Sentence Relatedness Information for Extractive Broadcast News Summarization", journal = j-TALLIP, volume = "19", number = "3", pages = "46:1--46:19", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377407", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377407", abstract = "The primary task of extractive summarization is to automatically select a set of representative sentences from a text or spoken document that can concisely express the most important theme of the original document. Recently, language modeling (LM) has \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Du:2020:CNL, author = "Qianlong Du and Chengqing Zong and Keh-Yih Su", title = "Conducting Natural Language Inference with Word-Pair-Dependency and Local Context", journal = j-TALLIP, volume = "19", number = "3", pages = "47:1--47:23", month = feb, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377704", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 3 09:11:26 MST 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377704", abstract = "This article proposes to conduct natural language inference with novel Enhanced-Relation-Head-Dependent triplets (RHD triplets), which are constructed via enhancing each word in the RHD triplet with its associated local context. Most previous approaches \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zitouni:2020:ENE, author = "Imed Zitouni", title = "Editorial from the New {Editor-in-Chief}: the Era of Natural Language Processing Innovations on {Asian} and Low-Resource Languages", journal = j-TALLIP, volume = "19", number = "4", pages = "48e:1--48e:2", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397501", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3397501", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48e", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2020:OEQ, author = "Jingxuan Yang and Haotian Cui and Si Li and Sheng Gao and Jun Guo and Zhengdong Lu", title = "Outline Extraction with Question-Specific Memory Cells", journal = j-TALLIP, volume = "19", number = "4", pages = "48:1--48:17", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3377707", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3377707", abstract = "Outline extraction has been widely applied in online consultation to help experts quickly understand individual cases. Given a specific case described as unstructured plain text, outline extraction aims to make a summary for this case by answering a set \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zarnoufi:2020:MNB, author = "Randa Zarnoufi and Hamid Jaafar and Mounia Abik", title = "Machine Normalization: Bringing Social Media Text from Non-Standard to Standard Form", journal = j-TALLIP, volume = "19", number = "4", pages = "49:1--49:30", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3378414", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3378414", abstract = "User-generated text in social media communication (SMC) is mainly characterized by non-standard form. It may contain code switching (CS) text, a widespread phenomenon in SMC, in addition to noisy elements used, especially in written conversations (use \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhattu:2020:ICM, author = "S. Nagesh Bhattu and Satya Krishna Nunna and D. V. L. N. Somayajulu and Binay Pradhan", title = "Improving Code-mixed {POS} Tagging Using Code-mixed Embeddings", journal = j-TALLIP, volume = "19", number = "4", pages = "50:1--50:31", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3380967", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3380967", abstract = "Social media data has become invaluable component of business analytics. A multitude of nuances of social media text make the job of conventional text analytical tools difficult. Code-mixing of text is a phenomenon prevalent among social media users, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmad:2020:NER, author = "Muhammad Tayyab Ahmad and Muhammad Kamran Malik and Khurram Shahzad and Faisal Aslam and Asif Iqbal and Zubair Nawaz and Faisal Bukhari", title = "Named Entity Recognition and Classification for {Punjabi Shahmukhi}", journal = j-TALLIP, volume = "19", number = "4", pages = "51:1--51:13", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383306", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383306", abstract = "Named entity recognition (NER) refers to the identification of proper nouns from natural language text and classifying them into named entity types, such as person, location, and organization. Due to the widespread applications of NER, numerous NER \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Garg:2020:HES, author = "Kanika Garg and D. K. Lobiyal", title = "{Hindi EmotionNet}: a Scalable Emotion Lexicon for Sentiment Classification of {Hindi} Text", journal = j-TALLIP, volume = "19", number = "4", pages = "52:1--52:35", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383330", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383330", abstract = "In this study, we create an emotion lexicon for the Hindi language called Hindi EmotionNet. It can assign emotional affinity to words in IndoWordNet. This lexicon contains 3,839 emotion words, with 1,246 positive and 2,399 negative words. We also \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Udomcharoenchaikit:2020:AER, author = "Can Udomcharoenchaikit and Prachya Boonkwan and Peerapon Vateekul", title = "Adversarial Evaluation of Robust Neural Sequential Tagging Methods for {Thai} Language", journal = j-TALLIP, volume = "19", number = "4", pages = "53:1--53:25", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383201", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383201", abstract = "Sequential tagging tasks, such as Part-Of-Speech (POS) tagging and Named-Entity Recognition, are the building blocks of many natural language processing applications. Although prior works have reported promising results in standard settings, they often \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sugandhi:2020:SLG, author = "Sugandhi and Parteek Kumar and Sanmeet Kaur", title = "Sign Language Generation System Based on {Indian} Sign Language Grammar", journal = j-TALLIP, volume = "19", number = "4", pages = "54:1--54:26", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3384202", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3384202", abstract = "Sign Language (SL), also known as gesture-based language, is used by people with hearing loss to convey their messages. SL interpreters are required for people who do not have the knowledge of SL, but interpreters are not readily available. Thus, a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarwar:2020:NLI, author = "Raheem Sarwar and Attapol T. Rutherford and Saeed-Ul Hassan and Thanawin Rakthanmanon and Sarana Nutanong", title = "Native Language Identification of Fluent and Advanced Non-Native Writers", journal = j-TALLIP, volume = "19", number = "4", pages = "55:1--55:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383202", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383202", abstract = "Native Language Identification (NLI) aims at identifying the native languages of authors by analyzing their text samples written in a non-native language. Most existing studies investigate this task for educational applications such as second language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Buyuk:2020:CDS, author = "Osman B{\"u}y{\"u}k", title = "Context-Dependent Sequence-to-Sequence {Turkish} Spelling Correction", journal = j-TALLIP, volume = "19", number = "4", pages = "56:1--56:16", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383200", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3383200", abstract = "In this article, we make use of sequence-to-sequence (seq2seq) models for spelling correction in the agglutinative Turkish language. In the baseline system, misspelled and target words are split into their letters and the letter sequences are fed into \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khalil:2020:EAC, author = "Hussein Khalil and Taha Osman and Mohammed Miltan", title = "Extracting {Arabic} Composite Names Using Genitive Principles of {Arabic} Grammar", journal = j-TALLIP, volume = "19", number = "4", pages = "57:1--57:16", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3382187", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3382187", abstract = "Named Entity Recognition (NER) is a basic prerequisite of using Natural Language Processing (NLP) for information retrieval. Arabic NER is especially challenging as the language is morphologically rich and has short vowels with no capitalisation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2020:SCH, author = "Kexin Wang and Yu Zhou and Jiajun Zhang and Shaonan Wang and Chengqing Zong", title = "Structurally Comparative Hinge Loss for Dependency-Based Neural Text Representation", journal = j-TALLIP, volume = "19", number = "4", pages = "58:1--58:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387633", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387633", abstract = "Dependency-based graph convolutional networks (DepGCNs) are proven helpful for text representation to handle many natural language tasks. Almost all previous models are trained with cross-entropy (CE) loss, which maximizes the posterior likelihood \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2020:JME, author = "Maofu Liu and Yukun Zhang and Wenjie Li and Donghong Ji", title = "Joint Model of Entity Recognition and Relation Extraction with Self-attention Mechanism", journal = j-TALLIP, volume = "19", number = "4", pages = "59:1--59:19", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387634", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387634", abstract = "In recent years, the joint model of entity recognition (ER) and relation extraction (RE) has attracted more and more attention in the healthcare and medical domains. However, there are some problems with the prior work. The joint model cannot extract \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2020:LGV, author = "H. R. Shiva Kumar and A. G. Ramakrishnan", title = "{Lipi Gnani}: a Versatile {OCR} for Documents in any Language Printed in {Kannada} Script", journal = j-TALLIP, volume = "19", number = "4", pages = "60:1--60:23", month = jul, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3387632", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jul 8 18:31:46 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/abs/10.1145/3387632", abstract = "A Kannada OCR, called Lipi Gnani, has been designed and developed from scratch, with the motivation of it being able to convert printed text or poetry in Kannada script, without any restriction on vocabulary. The training and test sets have been \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hao:2020:CST, author = "Ming Hao and Bo Xu and Jing-Yi Liang and Bo-Wen Zhang and Xu-Cheng Yin", title = "{Chinese} Short Text Classification with Mutual-Attention Convolutional Neural Networks", journal = j-TALLIP, volume = "19", number = "5", pages = "61:1--61:13", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3388970", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3388970", abstract = "The methods based on the combination of word-level and character-level features can effectively boost performance on Chinese short text classification. A lot of works concatenate two-level features with little processing, which leads to losing feature \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2020:SDE, author = "Fan Xu and Jian Luo and Mingwen Wang and Guodong Zhou", title = "Speech-Driven End-to-End Language Discrimination toward {Chinese} Dialects", journal = j-TALLIP, volume = "19", number = "5", pages = "62:1--62:24", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389021", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3389021", abstract = "Language discrimination among similar languages, varieties, and dialects is a challenging natural language processing task. The traditional text-driven focus leads to poor results. In this article, we explore the effectiveness of speech-driven features \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2020:IIF, author = "Junjie Chen and Hongxu Hou and Jing Gao", title = "Inside Importance Factors of Graph-Based Keyword Extraction on {Chinese} Short Text", journal = j-TALLIP, volume = "19", number = "5", pages = "63:1--63:15", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3388971", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3388971", abstract = "Keywords are considered to be important words in the text and can provide a concise representation of the text. With the surge of unlabeled short text on the Internet, automatic keyword extraction task has proven useful in other information processing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lou:2020:EBS, author = "Yinxia Lou and Yue Zhang and Fei Li and Tao Qian and Donghong Ji", title = "Emoji-Based Sentiment Analysis Using Attention Networks", journal = j-TALLIP, volume = "19", number = "5", pages = "64:1--64:13", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389035", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3389035", abstract = "Emojis are frequently used to express moods, emotions, and feelings in social media. There has been much research on emojis and sentiments. However, existing methods mainly face two limitations. First, they treat emojis as binary indicator features and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2020:DNN, author = "Long Zhou and Jiajun Zhang and Xiaomian Kang and Chengqing Zong", title = "Deep Neural Network--based Machine Translation System Combination", journal = j-TALLIP, volume = "19", number = "5", pages = "65:1--65:19", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389791", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3389791", abstract = "Deep neural networks (DNNs) have provably enhanced the state-of-the-art natural language process (NLP) with their capability of feature learning and representation. As one of the more challenging NLP tasks, neural machine translation (NMT) becomes a new \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ameur:2020:RAT, author = "Mohamed Seghir Hadj Ameur and Riadh Belkebir and Ahmed Guessoum", title = "Robust {Arabic} Text Categorization by Combining Convolutional and Recurrent Neural Networks", journal = j-TALLIP, volume = "19", number = "5", pages = "66:1--66:16", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3390092", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3390092", abstract = "Text Categorization is an important task in the area of Natural Language Processing (NLP). Its goal is to learn a model that can accurately classify any textual document for a given language into one of a set of predefined categories. In the context of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "66", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2020:SMT, author = "Ayan Das and Sudeshna Sarkar", title = "A Survey of the Model Transfer Approaches to Cross-Lingual Dependency Parsing", journal = j-TALLIP, volume = "19", number = "5", pages = "67:1--67:60", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3383772", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3383772", abstract = "Cross-lingual dependency parsing approaches have been employed to develop dependency parsers for the languages for which little or no treebanks are available using the treebanks of other languages. A language for which the cross-lingual parser is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Marie:2020:ITU, author = "Benjamin Marie and Atsushi Fujita", title = "Iterative Training of Unsupervised Neural and Statistical Machine Translation Systems", journal = j-TALLIP, volume = "19", number = "5", pages = "68:1--68:21", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3389790", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3389790", abstract = "Recent work achieved remarkable results in training neural machine translation (NMT) systems in a fully unsupervised way, with new and dedicated architectures that only rely on monolingual corpora. However, previous work also showed that unsupervised \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chimalamarri:2020:MSI, author = "Santwana Chimalamarri and Dinkar Sitaram and Ashritha Jain", title = "Morphological Segmentation to Improve Crosslingual Word Embeddings for Low Resource Languages", journal = j-TALLIP, volume = "19", number = "5", pages = "69:1--69:15", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3390298", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3390298", abstract = "Crosslingual word embeddings developed from multiple parallel corpora help in understanding the relationships between languages and improving the prediction quality of machine translation. However, in low resource languages with complex and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2020:PQA, author = "Ying Li and Jizhou Huang and Miao Fan and Jinyi Lei and Haifeng Wang and Enhong Chen", title = "Personalized Query Auto-Completion for Large-Scale {POI} Search at {Baidu} Maps", journal = j-TALLIP, volume = "19", number = "5", pages = "70:1--70:16", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394137", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3394137", abstract = "Query auto-completion (QAC) is a featured function that has been widely adopted by many sub-domains of search. It can dramatically reduce the number of typed characters and avoid spelling mistakes. These merits of QAC are highlighted to improve user satisfaction, especially when users intend to type in a query on mobile devices. In this article, we will present our industrial solution to the personalized QAC for the point of interest (POI) search at Baidu Maps, a well-known Web mapping service on mobiles in China. The industrial solution makes a good tradeoff between the offline effectiveness of a novel neural learning model that we devised for feature generation and the online efficiency of an off-the-shelf learning to rank (LTR) approach for the real-time suggestion. Besides some practical lessons from how a real-world QAC system is built and deployed in Baidu Maps to facilitate a large number of users in searching tens of millions of POIs, we mainly explore two specific features for the personalized QAC function of the POI search engine: the spatial-temporal characteristics of POIs and the historically queried POIs of individual users.\par We leverage the large-volume POI search logs in Baidu Maps to conduct offline evaluations of our personalized QAC model measured by multiple metrics, including Mean Reciprocal Rank (MRR), Success Rate (SR), and normalized Discounted Cumulative Gain (nDCG). Extensive experimental results demonstrate that the personalized model enhanced by the proposed features can achieve substantial improvements (i.e., +3.29\% MRR, +3.78\% SR@1, +5.17\% SR@3, +1.96\% SR@5, and +3.62\% nDCG@5). After deploying this upgraded model into the POI search engine at Baidu Maps for A/B testing online, we observe that some other critical indicators, such as the average number of keystrokes and the average typing speed at keystrokes in a QAC session, which are also related to user satisfaction, decrease as well by 1.37\% and 1.69\%, respectively. So the conclusion is that the two kinds of features contributed by us are quite helpful in personalized mapping services for industrial practice.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alkhatib:2020:DLA, author = "Manar Alkhatib and Azza Abdel Monem and Khaled Shaalan", title = "Deep Learning for {Arabic} Error Detection and Correction", journal = j-TALLIP, volume = "19", number = "5", pages = "71:1--71:13", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3373266", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3373266", abstract = "Research on tools for automating the proofreading of Arabic text has received much attention in recent years. There is an increasing demand for applications that can detect and correct Arabic spelling and grammatical errors to improve the quality of Arabic text content and application input. Our review of previous studies indicates that few Arabic spell-checking research efforts appropriately address the detection and correction of ill-formed words that do not conform to the Arabic morphology system. Even fewer systems address the detection and correction of erroneous well-formed Arabic words that are either contextually or semantically inconsistent within the text. We introduce an approach that investigates employing deep neural network technology for error detection in Arabic text. We have developed a systematic framework for spelling and grammar error detection, as well as correction at the word level, based on a bidirectional long short-term memory mechanism and word embedding, in which a polynomial network classifier is at the top of the system. To get conclusive results, we have developed the most significant gold standard annotated corpus to date, containing 15 million fully inflected Arabic words. The data were collected from diverse text sources and genres, in which every erroneous and ill-formed word has been annotated, validated, and manually revised by Arabic specialists. This valuable asset is available for the Arabic natural language processing research community. The experimental results confirm that our proposed system significantly outperforms the performance of Microsoft Word 2013 and Open Office Ayaspell 3.4, which have been used in the literature for evaluating similar research.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Orhan:2020:LWV, author = "Umut Orhan and Enis Arslan", title = "Learning Word-vector Quantization: a Case Study in Morphological Disambiguation", journal = j-TALLIP, volume = "19", number = "5", pages = "72:1--72:18", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3397967", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3397967", abstract = "We introduced a new classifier named Learning Word-vector Quantization (LWQ) to solve morphological ambiguities in Turkish, which is an agglutinative language. First, a new and morphologically annotated corpus, and then its datasets are prepared with a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dhar:2020:CSC, author = "Ankita Dhar and Himadri Mukherjee and Niladri Sekhar Dash and Kaushik Roy", title = "{CESS} --- a System to Categorize {Bangla} {Web} Text Documents", journal = j-TALLIP, volume = "19", number = "5", pages = "73:1--73:18", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3398070", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3398070", abstract = "Technology has evolved remarkably, which has led to an exponential increase in the availability of digital text documents of disparate domains over the Internet. This makes the retrieval of the information a very much time- and resource-consuming task. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bai:2020:NCT, author = "Ruirui Bai and Zhongqing Wang and Fang Kong and Shoushan Li and Guodong Zhou", title = "Neural Co-training for Sentiment Classification with Product Attributes", journal = j-TALLIP, volume = "19", number = "5", pages = "74:1--74:17", month = aug, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3394113", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Aug 28 11:52:49 MDT 2020", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3394113", abstract = "Sentiment classification aims to detect polarity from a piece of text. The polarity is usually positive or negative, and the text genre is usually product review. The challenges of sentiment classification are that it is hard to capture semantic of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Schmidt:2020:GTC, author = "Dirk Schmidt", title = "Grading {Tibetan} Children's Literature: a Test Case Using the {NLP} Readability Tool {``Dakje''}", journal = j-TALLIP, volume = "19", number = "6", pages = "75:1--75:19", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3392046", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3392046", abstract = "Worldwide, literacy is on the rise. This historically unprecedented surge-especially over the past 200 years-has changed nearly everything about the ancient technology of reading. Who reads is changing: Literacy is no longer just for elite, professional \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Habiba:2020:TCN, author = "Rabia Habiba and Muhammad Awais and Muhammad Shoaib", title = "A Technique to Calculate National Happiness Index by Analyzing {Roman Urdu} Messages Posted on Social Media", journal = j-TALLIP, volume = "19", number = "6", pages = "76:1--76:16", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3400712", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3400712", abstract = "National Happiness Index (NHI) is a national indicator of development that estimates the economic and social well-being of the nation's individuals. With the proliferation of the internet, people share a significant amount of data on social media \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2020:EFL, author = "Hao Wang and Qiongxing Tao and Siyuan Du and Xiangfeng Luo", title = "An Extensible Framework of Leveraging Syntactic Skeleton for Semantic Relation Classification", journal = j-TALLIP, volume = "19", number = "6", pages = "77:1--77:21", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3402885", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3402885", abstract = "Relation classification is one of the most fundamental upstream tasks in natural language processing and information extraction. State-of-the-art approaches make use of various deep neural networks (DNNs) to extract higher-level features directly. They \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Malhas:2020:ABR, author = "Rana Malhas and Tamer Elsayed", title = "{AyaTEC}: Building a Reusable Verse-Based Test Collection for {Arabic} Question Answering on the {Holy Qur'an}", journal = j-TALLIP, volume = "19", number = "6", pages = "78:1--78:21", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3400396", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3400396", abstract = "The absence of publicly available reusable test collections for Arabic question answering on the Holy Qur'an has impeded the possibility of fairly comparing the performance of systems in that domain. In this article, we introduce AyaTEC, a reusable test \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ruan:2020:CTV, author = "Yu-Ping Ruan and Zhen-Hua Ling and Xiaodan Zhu", title = "Condition-Transforming Variational Autoencoder for Generating Diverse Short Text Conversations", journal = j-TALLIP, volume = "19", number = "6", pages = "79:1--79:13", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3402884", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3402884", abstract = "In this article, conditional-transforming variational autoencoders (CTVAEs) are proposed for generating diverse short text conversations. In conditional variational autoencoders (CVAEs), the prior distribution of latent variable z follows a multivariate \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Badaro:2020:LPA, author = "Gilbert Badaro and Hazem Hajj and Nizar Habash", title = "A Link Prediction Approach for Accurately Mapping a Large-scale {Arabic} Lexical Resource to {English} {WordNet}", journal = j-TALLIP, volume = "19", number = "6", pages = "80:1--80:38", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3404854", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3404854", abstract = "Success of Natural Language Processing (NLP) models, just like all advanced machine learning models, rely heavily on large -scale lexical resources. For English, English WordNet (EWN) is a leading example of a large-scale resource that has enabled \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2020:IWS, author = "Tinghuai Ma and Raeed Al-Sabri and Lejun Zhang and Bockarie Marah and Najla Al-Nabhan", title = "The Impact of Weighting Schemes and Stemming Process on Topic Modeling of {Arabic} Long and Short Texts", journal = j-TALLIP, volume = "19", number = "6", pages = "81:1--81:23", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3405843", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3405843", abstract = "In this article, first a comprehensive study of the impact of term weighting schemes on the topic modeling performance (i.e., LDA and DMM) on Arabic long and short texts is presented. We investigate six term weighting methods including Word count method \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{M:2020:CAH, author = "Poornima Devi. M. and M. Sornam", title = "Classification of Ancient Handwritten {Tamil} Characters on Palm Leaf Inscription Using Modified Adaptive Backpropagation Neural Network with {GLCM} Features", journal = j-TALLIP, volume = "19", number = "6", pages = "82:1--82:24", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3406209", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3406209", abstract = "The core aspiration of this proposed work is to classify Tamil characters inscribed in the palm leaf manuscript using an Artificial Neural Network. Tamil palm leaf manuscript characters in the form of images were processed and segmented using contour-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2020:AMU, author = "Qimeng Yang and Long Yu and Shengwei Tian and Jinmiao Song", title = "Attention Mechanism for {Uyghur} Personal Pronouns Resolution", journal = j-TALLIP, volume = "19", number = "6", pages = "83:1--83:13", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3412323", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3412323", abstract = "Deep neural network models for Uyghur personal pronoun resolution learn semantic information for personal pronoun and antecedents, but tend to be short-sighted-they ignore the importance of each feature. In this article, we propose a Uyghur personal \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xi:2020:GEL, author = "Xuefeng Xi and Zhou Pi and Guodong Zhou", title = "Global Encoding for Long {Chinese} Text Summarization", journal = j-TALLIP, volume = "19", number = "6", pages = "84:1--84:17", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407911", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3407911", abstract = "Text summarization is one of the significant tasks of natural language processing, which automatically converts text into a summary. Some summarization systems, for short/long English, and short Chinese text, benefit from advances in the neural encoder-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tanwar:2020:TMR, author = "Ashwani Tanwar and Prasenjit Majumder", title = "Translating Morphologically Rich {Indian} Languages under Zero-Resource Conditions", journal = j-TALLIP, volume = "19", number = "6", pages = "85:1--85:15", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3407912", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3407912", abstract = "This work presents an in-depth analysis of machine translations of morphologically-rich Indo-Aryan and Dravidian languages under zero-resource conditions. It focuses on Zero-Shot Systems for these languages and leverages transfer-learning by exploiting \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Laatar:2020:DAW, author = "Rim Laatar and Chafik Aloulou and Lamia Hadrich Belguith", title = "Disambiguating {Arabic} Words According to Their Historical Appearance in the Document Based on Recurrent Neural Networks", journal = j-TALLIP, volume = "19", number = "6", pages = "86:1--86:16", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3410569", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3410569", abstract = "How can we determine the semantic meaning of a word in relation to its context of appearance? We eventually have to grabble with this difficult question, as one of the paramount problems of Natural Language Processing (NLP). In other words, this issue \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chou:2020:CWN, author = "Chien-Lung Chou and Chia-Hui Chang and Yuan-Hao Lin and Kuo-Chun Chien", title = "On the Construction of {Web} {NER} Model Training Tool based on Distant Supervision", journal = j-TALLIP, volume = "19", number = "6", pages = "87:1--87:28", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3422817", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3422817", abstract = "Named entity recognition (NER) is an important task in natural language understanding, as it extracts the key entities (person, organization, location, date, number, etc.) and objects (product, song, movie, activity name, etc.) mentioned in texts. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2020:DEW, author = "Chuhan Wu and Fangzhao Wu and Tao Qi and Junxin Liu and Yongfeng Huang and Xing Xie", title = "Detecting Entities of Works for {Chinese} Chatbot", journal = j-TALLIP, volume = "19", number = "6", pages = "88:1--88:13", month = nov, year = "2020", CODEN = "????", DOI = "https://doi.org/10.1145/3414901", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sun Mar 28 08:15:55 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3414901", abstract = "Chatbots such as Xiaoice have gained huge popularity in recent years. Users frequently mention their favorite works such as songs and movies in conversations with chatbots. Detecting these entities can help design better chat strategies and improve user \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Manogaran:2021:SID, author = "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin Xin", title = "Special Issue on Deep Structured Learning for Natural Language Processing", journal = j-TALLIP, volume = "20", number = "1", pages = "1:1--1:2", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436206", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3436206", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:CDB, author = "Kun Wang and Yanpeng Cui and Jianwei Hu and Yu Zhang and Wei Zhao and Luming Feng", title = "Cyberbullying Detection, Based on the {FastText} and Word Similarity Schemes", journal = j-TALLIP, volume = "20", number = "1", pages = "2:1--2:15", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3398191", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3398191", abstract = "With recent developments in online social networks (OSNs), these services are widely applied in daily lives. On the other hand, cyberbullying, which is a relatively new type of harassment through the internet-based electronic devices, is rising in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2021:DIM, author = "Chengai Sun and Liangyu Lv and Gang Tian and Tailu Liu", title = "Deep Interactive Memory Network for Aspect-Level Sentiment Analysis", journal = j-TALLIP, volume = "20", number = "1", pages = "3:1--3:12", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3402886", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3402886", abstract = "The goal of aspect-level sentiment analysis is to identify the sentiment polarity of a specific opinion target expressed; it is a fine-grained sentiment analysis task. Most of the existing works study how to better use the target information to model \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:VTM, author = "Wei Wang and Zhiguo Gong and Jing Ren and Feng Xia and Zhihan Lv and Wei Wei", title = "Venue Topic Model-enhanced Joint Graph Modelling for Citation Recommendation in Scholarly Big Data", journal = j-TALLIP, volume = "20", number = "1", pages = "4:1--4:15", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3404995", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3404995", abstract = "Natural language processing technologies, such as topic models, have been proven to be effective for scholarly recommendation tasks with the ability to deal with content information. Recently, venue recommendation is becoming an increasingly important \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Peng:2021:THS, author = "Lingxi Peng and Haohuai Liu and Yangang Nie and Ying Xie and Xuan Tang and Ping Luo", title = "The Transnational Happiness Study with Big Data Technology", journal = j-TALLIP, volume = "20", number = "1", pages = "5:1--5:12", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3412497", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3412497", abstract = "Happiness is a hot topic in academic circles. The study of happiness involves many disciplines, such as philosophy, psychology, sociology, and economics. However, there are few studies on the quantitative analysis of the factors affecting happiness. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guangce:2021:KDN, author = "Ruan Guangce and Xia Lei", title = "Knowledge Discovery of News Text Based on Artificial Intelligence", journal = j-TALLIP, volume = "20", number = "1", pages = "6:1--6:18", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418062", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3418062", abstract = "The explosion of news text and the development of artificial intelligence provide a new opportunity and challenge to provide high-quality media monitoring service. In this article, we propose a semantic analysis approach based on the Latent Dirichlet \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Meelen:2021:OLA, author = "Marieke Meelen and {\'E}lie Roux and Nathan Hill", title = "Optimisation of the Largest Annotated {Tibetan} Corpus Combining Rule-based, Memory-based, and Deep-learning Methods", journal = j-TALLIP, volume = "20", number = "1", pages = "7:1--7:11", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3409488", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3409488", abstract = "This article presents a pipeline that converts collections of Tibetan documents in plain text or XML into a fully segmented and POS-tagged corpus. We apply the pipeline to the large extent collection of the Buddhist Digital Resource Center. The semisupervised methods presented here not only result in a new and improved version of the largest annotated Tibetan corpus to date, the integration of rule-based, memory-based, and neural-network methods also serves as a good example of how to overcome challenges of under-researched languages. The end-to-end accuracy of our entire automatic pipeline of 91.99\% is high enough to make the resulting corpus a useful resource for both linguists and scholars of Tibetan studies.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2021:MNF, author = "S. Rakesh Kumar and S. Muthuramalingam and Fadi Al-Turjman", title = "Multimodal News Feed Evaluation System with Deep Reinforcement Learning Approaches", journal = j-TALLIP, volume = "20", number = "1", pages = "8:1--8:12", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3414523", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3414523", abstract = "Multilingual and multimodal data analysis is the emerging news feed evaluation system. News feed analysis and evaluations are interrelated processes, which are useful in understanding the news factors. The news feed evaluation system can be implemented \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khan:2021:HSD, author = "Muhammad Moin Khan and Khurram Shahzad and Muhammad Kamran Malik", title = "Hate Speech Detection in {Roman Urdu}", journal = j-TALLIP, volume = "20", number = "1", pages = "9:1--9:19", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3414524", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3414524", abstract = "Hate speech is a specific type of controversial content that is widely legislated as a crime that must be identified and blocked. However, due to the sheer volume and velocity of the Twitter data stream, hate speech detection cannot be performed manually. To address this issue, several studies have been conducted for hate speech detection in European languages, whereas little attention has been paid to low-resource South Asian languages, making the social media vulnerable for millions of users. In particular, to the best of our knowledge, no study has been conducted for hate speech detection in Roman Urdu text, which is widely used in the sub-continent. In this study, we have scrapped more than 90,000 tweets and manually parsed them to identify 5,000 Roman Urdu tweets. Subsequently, we have employed an iterative approach to develop guidelines and used them for generating the Hate Speech Roman Urdu 2020 corpus. The tweets in the this corpus are classified at three levels: Neutral--Hostile, Simple--Complex, and Offensive--Hate speech. As another contribution, we have used five supervised learning techniques, including a deep learning technique, to evaluate and compare their effectiveness for hate speech detection. The results show that Logistic Regression outperformed all other techniques, including deep learning techniques for the two levels of classification, by achieved an F1 score of 0.906 for distinguishing between Neutral--Hostile tweets, and 0.756 for distinguishing between Offensive--Hate speech tweets.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2021:UNM, author = "Haipeng Sun and Rui Wang and Masao Utiyama and Benjamin Marie and Kehai Chen and Eiichiro Sumita and Tiejun Zhao", title = "Unsupervised Neural Machine Translation for Similar and Distant Language Pairs: an Empirical Study", journal = j-TALLIP, volume = "20", number = "1", pages = "10:1--10:17", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418059", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3418059", abstract = "Unsupervised neural machine translation (UNMT) has achieved remarkable results for several language pairs, such as French-English and German-English. Most previous studies have focused on modeling UNMT systems; few studies have investigated the effect \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2021:HBN, author = "Peiying Zhang and Xingzhe Huang and Maozhen Li and Yu Xue", title = "Hybridization between Neural Computing and Nature-Inspired Algorithms for a Sentence Similarity Model Based on the Attention Mechanism", journal = j-TALLIP, volume = "20", number = "1", pages = "11:1--11:21", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447756", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447756", abstract = "Sentence similarity analysis has been applied in many fields, such as machine translation, the question answering system, and voice customer service. As a basic task of natural language processing, sentence similarity analysis plays an important role in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Husain:2021:SOL, author = "Fatemah Husain and Ozlem Uzuner", title = "A Survey of Offensive Language Detection for the {Arabic} Language", journal = j-TALLIP, volume = "20", number = "1", pages = "12:1--12:44", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3421504", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3421504", abstract = "The use of offensive language in user-generated content is a serious problem that needs to be addressed with the latest technology. The field of Natural Language Processing (NLP) can support the automatic detection of offensive language. In this survey, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alzubaidi:2021:RTA, author = "Mohammad A. Alzubaidi and Mwaffaq Otoom and Nouran S. Ahmad", title = "Real-time Assistive Reader Pen for {Arabic} Language", journal = j-TALLIP, volume = "20", number = "1", pages = "13:1--13:30", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423133", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3423133", abstract = "Disability is an impairment affecting an individual's livelihood and independence. Assistive technology enables the disabled cohort of the community to break the barriers to learning, access information, contribute to the community, and live \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sidig:2021:KAS, author = "Ala Addin I. Sidig and Hamzah Luqman and Sabri Mahmoud and Mohamed Mohandes", title = "{KArSL}: {Arabic} Sign Language Database", journal = j-TALLIP, volume = "20", number = "1", pages = "14:1--14:19", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423420", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3423420", abstract = "Sign language is the major means of communication for the deaf community. It uses body language and gestures such as hand shapes, lib patterns, and facial expressions to convey a message. Sign language is geography-specific, as it differs from one \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wijayanti:2021:AIS, author = "Rini Wijayanti and Andria Arisal", title = "Automatic {Indonesian} Sentiment Lexicon Curation with Sentiment Valence Tuning for Social Media Sentiment Analysis", journal = j-TALLIP, volume = "20", number = "1", pages = "15:1--15:16", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425632", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3425632", abstract = "A novel Indonesian sentiment lexicon (SentIL --- Sentiment Indonesian Lexicon) is created with an automatic pipeline; from creating sentiment seed words, adding new words with slang words, emoticons, and from the given dictionary and sentiment corpus, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2021:TTS, author = "Zhongyang Li and Xiao Ding and Ting Liu", title = "{TransBERT}: a Three-Stage Pre-training Technology for Story-Ending Prediction", journal = j-TALLIP, volume = "20", number = "1", pages = "16:1--16:20", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3427669", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3427669", abstract = "Recent advances, such as GPT, BERT, and RoBERTa, have shown success in incorporating a pre-trained transformer language model and fine-tuning operations to improve downstream NLP systems. However, this framework still has some fundamental problems in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bolucu:2021:CUM, author = "Necva B{\"o}l{\"u}c{\"u} and Burcu Can", title = "A Cascaded Unsupervised Model for {PoS} Tagging", journal = j-TALLIP, volume = "20", number = "1", pages = "17:1--17:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447759", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447759", abstract = "Part of speech (PoS) tagging is one of the fundamental syntactic tasks in Natural Language Processing, as it assigns a syntactic category to each word within a given sentence or context (such as noun, verb, adjective, etc.). Those syntactic categories \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chauhan:2021:ISC, author = "Uttam Chauhan and Apurva Shah", title = "Improving Semantic Coherence of {Gujarati} Text Topic Model Using Inflectional Forms Reduction and Single-letter Words Removal", journal = j-TALLIP, volume = "20", number = "1", pages = "18:1--18:18", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447760", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 15 14:24:01 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447760", abstract = "A topic model is one of the best stochastic models for summarizing an extensive collection of text. It has accomplished an inordinate achievement in text analysis as well as text summarization. It can be employed to the set of documents that are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Congjun:2021:RDT, author = "Long Congjun and Nathan W. Hill", title = "Recent Developments in {Tibetan NLP}", journal = j-TALLIP, volume = "20", number = "2", pages = "19:1--19:3", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453692", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3453692", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{List:2021:TSH, author = "Johann-Mattis List and Nathaniel A. Sims and Robert Forkel", title = "Toward a Sustainable Handling of Interlinear-Glossed Text in Language Documentation", journal = j-TALLIP, volume = "20", number = "2", pages = "20:1--20:15", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3389010", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3389010", abstract = "While the amount of digitally available data on the worlds' languages is steadily increasing, with more and more languages being documented, only a small proportion of the language resources produced are sustainable. Data reuse is often difficult due to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Krishna:2021:ATA, author = "Ravi Krishna and Norman Mu and Kurt Keutzer", title = "Applying Text Analytics to the Mind-section Literature of the {Tibetan} Tradition of the {Great Perfection}", journal = j-TALLIP, volume = "20", number = "2", pages = "21:1--21:32", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3392047", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3392047", abstract = "Over the past decade, through a mixture of optical character recognition and manual input, there is now a growing corpus of Tibetan literature available as e-texts in Unicode format. With the creation of such a corpus, the techniques of text analytics \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Trinley:2021:TWE, author = "Ngawang Trinley and Tenzin and Dirk Schmidt and Helios Hildt and Tenzin Kaldan", title = "Taming the Wild Etext: Managing, Annotating, and Sharing {Tibetan} Corpora in Open Spaces", journal = j-TALLIP, volume = "20", number = "2", pages = "22:1--22:23", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418060", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3418060", abstract = "Digital text is quickly becoming essential to modern daily life. The article you are reading right now is born digital; unlike texts of the not-so-distant past, it may never be printed at all. Worldwide, the trend is clear: Digital text is on the way in, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kulkarni:2021:SPF, author = "Amba Kulkarni", title = "{Sanskrit} Parsing Following {Indian} Theories of Verbal Cognition", journal = j-TALLIP, volume = "20", number = "2", pages = "23:1--23:38", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3418061", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3418061", abstract = "P{\=a}{\d{n}}ini's grammar is an important milestone in the Indian grammatical tradition. Unlike grammars of other languages, it is almost exhaustive and together with the theories of 'sabdabodha (verbal cognition), this grammar provides a system for language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2021:FBS, author = "Yachao Li and Jing Jiang and Jia Yangji and Ning Ma", title = "Finding Better Subwords for {Tibetan} Neural Machine Translation", journal = j-TALLIP, volume = "20", number = "2", pages = "24:1--24:11", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448216", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3448216", abstract = "Subword segmentation plays an important role in Tibetan neural machine translation (NMT). The structure of Tibetan words consists of two levels. First, words consist of a sequence of syllables, and then a syllable consists of a sequence of characters. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Long:2021:RTM, author = "Congjun Long and Xuewen Zhou and Maoke Zhou", title = "Recognition of {Tibetan} Maximal-length Noun Phrases Based on Syntax Tree", journal = j-TALLIP, volume = "20", number = "2", pages = "25:1--25:13", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3423324", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3423324", abstract = "Frequently corresponding to syntactic components, the Maximal-length Noun Phrase (MNP) possesses abundant syntactic and semantic information and acts a certain semantic role in sentences. Recognition of MNP plays an important role in Natural Language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shi:2021:MLC, author = "Shumin Shi and Dan Luo and Xing Wu and Congjun Long and Heyan Huang", title = "Multi-level Chunk-based Constituent-to-Dependency {Treebank} Transformation for {Tibetan} Dependency Parsing", journal = j-TALLIP, volume = "20", number = "2", pages = "26:1--26:12", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3424247", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3424247", abstract = "Dependency parsing is an important task for Natural Language Processing (NLP). However, a mature parser requires a large treebank for training, which is still extremely costly to create. Tibetan is a kind of extremely low-resource language for NLP, there \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2021:JMR, author = "Yuan Sun and Andong Chen and Chaofan Chen and Tianci Xia and Xiaobing Zhao", title = "A Joint Model for Representation Learning of {Tibetan} Knowledge Graph Based on Encyclopedia", journal = j-TALLIP, volume = "20", number = "2", pages = "27:1--27:17", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447248", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447248", abstract = "Learning the representation of a knowledge graph is critical to the field of natural language processing. There is a lot of research for English knowledge graph representation. However, for the low-resource languages, such as Tibetan, how to represent \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:CSE, author = "Hao Wang and Bin Wang and Jianyong Duan and Jiajun Zhang", title = "{Chinese} Spelling Error Detection Using a Fusion Lattice {LSTM}", journal = j-TALLIP, volume = "20", number = "2", pages = "28:1--28:11", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3426882", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3426882", abstract = "Spelling error detection serves as a crucial preprocessing in many natural language processing applications. Unlike English, where every single word is directly typed by keyboard, we have to use an input method to input Chinese characters. The pinyin \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nasution:2021:POB, author = "Arbi Haza Nasution and Yohei Murakami and Toru Ishida", title = "Plan Optimization to Bilingual Dictionary Induction for Low-resource Language Families", journal = j-TALLIP, volume = "20", number = "2", pages = "29:1--29:28", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448215", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3448215", abstract = "Creating bilingual dictionary is the first crucial step in enriching low-resource languages. Especially for the closely related ones, it has been shown that the constraint-based approach is useful for inducing bilingual lexicons from two bilingual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{An:2021:NDP, author = "Bo An and Congjun Long", title = "Neural Dependency Parser for {Tibetan} Sentences", journal = j-TALLIP, volume = "20", number = "2", pages = "30:1--30:16", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3429456", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3429456", abstract = "The research of Tibetan dependency analysis is mainly limited to two challenges: lack of a dataset and reliance on expert knowledge. To resolve the preceding challenges, we first introduce a new Tibetan dependency analysis dataset, and then propose a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2021:USC, author = "Longtu Zhang and Mamoru Komachi", title = "Using Sub-character Level Information for Neural Machine Translation of Logographic Languages", journal = j-TALLIP, volume = "20", number = "2", pages = "31:1--31:15", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431727", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3431727", abstract = "Logographic and alphabetic languages (e.g., Chinese vs. English) have different writing systems linguistically. Languages belonging to the same writing system usually exhibit more sharing information, which can be used to facilitate natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2021:HIC, author = "Santosh Kumar Mishra and Rijul Dhir and Sriparna Saha and Pushpak Bhattacharyya", title = "A {Hindi} Image Caption Generation Framework Using Deep Learning", journal = j-TALLIP, volume = "20", number = "2", pages = "32:1--32:19", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3432246", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3432246", abstract = "Image captioning is the process of generating a textual description of an image that aims to describe the salient parts of the given image. It is an important problem, as it involves computer vision and natural language processing, where computer vision \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Darwish:2021:ADR, author = "Kareem Darwish and Ahmed Abdelali and Hamdy Mubarak and Mohamed Eldesouki", title = "{Arabic} Diacritic Recovery Using a Feature-rich {biLSTM} Model", journal = j-TALLIP, volume = "20", number = "2", pages = "33:1--33:18", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434235", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3434235", abstract = "Diacritics (short vowels) are typically omitted when writing Arabic text, and readers have to reintroduce them to correctly pronounce words. There are two types of Arabic diacritics: The first are core-word diacritics (CW), which specify the lexical \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Binbeshr:2021:SRH, author = "Farid Binbeshr and Amirrudin Kamsin and Manal Mohammed", title = "A Systematic Review on Hadith Authentication and Classification Methods", journal = j-TALLIP, volume = "20", number = "2", pages = "34:1--34:17", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434236", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3434236", abstract = "Background: A hadith refers to sayings, actions, and characteristics of the Prophet Muhammad peace be upon him. The authenticity of hadiths is crucial, because they constitute the source of legislation for Muslims with the Holy Quran. Classifying hadiths \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:HMN, author = "Yu Wang and Yining Sun and Zuchang Ma and Lisheng Gao and Yang Xu", title = "A Hybrid Model for Named Entity Recognition on {Chinese} Electronic Medical Records", journal = j-TALLIP, volume = "20", number = "2", pages = "35:1--35:12", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436819", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3436819", abstract = "Electronic medical records (EMRs) contain valuable information about the patients, such as clinical symptoms, diagnostic results, and medications. Named entity recognition (NER) aims to recognize entities from unstructured text, which is the initial step \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jin:2021:HSS, author = "Guozhe Jin and Zhezhou Yu", title = "A Hierarchical Sequence-to-Sequence Model for {Korean} {POS} Tagging", journal = j-TALLIP, volume = "20", number = "2", pages = "36:1--36:13", month = apr, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3421762", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 6 07:32:43 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3421762", abstract = "Part-of-speech (POS) tagging is a fundamental task in natural language processing. Korean POS tagging consists of two subtasks: morphological analysis and POS tagging. In recent years, scholars have tended to use the seq2seq model to solve this problem. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{S:2021:SAA, author = "Dhivya S. and Usha Devi G.", title = "Study on Automated Approach to Recognize Characters for Handwritten and Historical Document", journal = j-TALLIP, volume = "20", number = "3", pages = "37:1--37:24", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3396167", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3396167", abstract = "Script recognition is the mechanism of automatic script analysis and recognition whereby intensive study has been carried out and a significant amount of papers on this problem have been released over the past. But there are still a few issues to be \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2021:DDH, author = "Xiaodong Yang and Xiaoxia Lin", title = "Design and Development of Heuristic Utility Management Algorithm for {Chinese} Library Management System", journal = j-TALLIP, volume = "20", number = "3", pages = "38:1--38:13", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3397968", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3397968", abstract = "Utility Management in a library is the programmatic tool with the synthetic mental program ability, along with Artificial Intelligence capacities, headed to manage a high volume of books, articles, and assignments, which help to ease the manual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{S:2021:THT, author = "Dhivya S. and Usha Devi G.", title = "{TAMIZHI}: Historical Tamil-Brahmi Script Recognition Using {CNN} and {MobileNet}", journal = j-TALLIP, volume = "20", number = "3", pages = "39:1--39:26", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3402891", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3402891", abstract = "Computational epigraphy is the study of an ancient script where the computer science and mathematical model is relatively built for epigraphy. The Tamil-Brahmi inscriptions are the most ancient of the extant written of the Tamil. The inscriptions furnish \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiang:2021:TLB, author = "Peipei Jiang and Liailun Chen and Min-Feng Wang", title = "Transfer Learning Based Recurrent Neural Network Algorithm for Linguistic Analysis", journal = j-TALLIP, volume = "20", number = "3", pages = "40:1--40:16", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3406204", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3406204", abstract = "Each language is a system of understanding and skills that allows language users to interact, express thoughts, hypotheses, feelings, wishes, and all that needs to be expressed. Linguistics is the research of these structures in all respects: the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fang:2021:HBG, author = "Hui Fang and Hongmei Shi and Jiuzhou Zhang", title = "Heuristic Bilingual Graph Corpus Network to Improve {English} Instruction Methodology Based on Statistical Translation Approach", journal = j-TALLIP, volume = "20", number = "3", pages = "41:1--41:14", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3406205", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3406205", abstract = "The number of sentence pairs in the bilingual corpus is a key to translation accuracy in computational machine translations. However, if the amount goes beyond a certain degree, the increasing number of cases has less impact on the translation while the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jamal:2021:DLB, author = "Nasir Jamal and Chen Xianqiao and Fadi Al-Turjman and Farhan Ullah", title = "A Deep Learning-based Approach for Emotions Classification in Big Corpus of Imbalanced Tweets", journal = j-TALLIP, volume = "20", number = "3", pages = "42:1--42:16", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3410570", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3410570", abstract = "Emotions detection in natural languages is very effective in analyzing the user's mood about a concerned product, news, topic, and so on. However, it is really a challenging task to extract important features from a burst of raw social text, as emotions \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Muthu:2021:FET, author = "Balaanand Muthu and Sivaparthipan Cb and Priyan Malarvizhi Kumar and Seifedine Nimer Kadry and Ching-Hsien Hsu and Oscar Sanjuan and Ruben Gonzalez Crespo", title = "A Framework for Extractive Text Summarization Based on Deep Learning Modified Neural Network Classifier", journal = j-TALLIP, volume = "20", number = "3", pages = "45:1--45:20", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3392048", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3392048", abstract = "There is an exponential growth of text data over the internet, and it is expected to gain significant growth and attention in the coming years. Extracting meaningful insights from text data is crucially important as it offers value-added solutions to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:AEA, author = "Ailing Wang and Jie Sun and Leiming Li", title = "An Analysis for Elements of Affecting the Establishment and Promotion of Micro-business Trust in {C2C} Model under {WeChat} Circumstance", journal = j-TALLIP, volume = "20", number = "3", pages = "46:1--46:11", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3398011", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3398011", abstract = "The core of micro-business and consumer transactions is trust. Based on the Theory of Reasoned Action and Technology Acceptance Model, this article discusses the factors of the establishment and promotion of micro-business trust from the trust orientation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2021:SGD, author = "Erlu Wang and Priyan Malarvizhi Kumar and R. Dinesh Jackson Samuel", title = "Semantic Graphical Dependence Parsing Model in Improving {English} Teaching Abilities", journal = j-TALLIP, volume = "20", number = "3", pages = "48:1--48:14", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425633", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3425633", abstract = "It is a very difficult problem to achieve high-order functionality for graphical dependency parsing without growing decoding difficulties. To solve this problem, this article offers a way for Semantic Graphical Dependence Parsing Model (SGDPM) with a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{P:2021:TST, author = "Ashokkumar P. and Siva Shankar G. and Gautam Srivastava and Praveen Kumar Reddy Maddikunta and Thippa Reddy Gadekallu", title = "A Two-stage Text Feature Selection Algorithm for Improving Text Classification", journal = j-TALLIP, volume = "20", number = "3", pages = "49:1--49:19", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3425781", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3425781", abstract = "As the number of digital text documents increases on a daily basis, the classification of text is becoming a challenging task. Each text document consists of a large number of words (or features) that drive down the efficiency of a classification \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Quamer:2021:SSA, author = "Waris Quamer and Praphula Kumar Jain and Arpit Rai and Vijayalakshmi Saravanan and Rajendra Pamula and Chiranjeev Kumar", title = "{SACNN}: Self-attentive Convolutional Neural Network Model for Natural Language Inference", journal = j-TALLIP, volume = "20", number = "3", pages = "50:1--50:16", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3426884", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3426884", abstract = "Inference has been central problem for understanding and reasoning in artificial intelligence. Especially, Natural Language Inference is an interesting problem that has attracted the attention of many researchers. Natural language inference intends to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liao:2021:ENO, author = "Hsiu-Li Liao and Zhen-Yu Huang and Su-Houn Liu", title = "The Effects of Negative Online Reviews on Consumer Perception, Attitude and Purchase Intention: Experimental Investigation of the Amount, Quality, and Presentation Order of {eWOM}", journal = j-TALLIP, volume = "20", number = "3", pages = "51:1--51:21", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3426883", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3426883", abstract = "The quick growth and fast spread of electronic word-of-mouth (eWOM) have created a new threat to Internet merchants and marketers through paid online reviewers flooding sites with product and service reviews that could confuse and deter customers. This \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Seifollahi:2021:EBT, author = "Sattar Seifollahi and Massimo Piccardi and Alireza Jolfaei", title = "An Embedding-Based Topic Model for Document Classification", journal = j-TALLIP, volume = "20", number = "3", pages = "52:1--52:13", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3431728", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3431728", abstract = "Topic modeling is an unsupervised learning task that discovers the hidden topics in a collection of documents. In turn, the discovered topics can be used for summarizing, organizing, and understanding the documents in the collection. Most of the existing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2021:DSL, author = "Yong Li and Xiaojun Yang and Min Zuo and Qingyu Jin and Haisheng Li and Qian Cao", title = "Deep Structured Learning for Natural Language Processing", journal = j-TALLIP, volume = "20", number = "3", pages = "53:1--53:14", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3433538", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:09 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3433538", abstract = "The real-time and dissemination characteristics of network information make net-mediated public opinion become more and more important food safety early warning resources, but the data of petabyte (PB) scale growth also bring great difficulties to the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mao:2021:NJM, author = "Cunli Mao and Zhibo Man and Zhengtao Yu and Shengxiang Gao and Zhenhan Wang and Hongbin Wang", title = "A Neural Joint Model with {BERT} for {Burmese} Syllable Segmentation, Word Segmentation, and {POS} Tagging", journal = j-TALLIP, volume = "20", number = "4", pages = "54:1--54:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3436818", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3436818", abstract = "The smallest semantic unit of the Burmese language is called the syllable. In the present study, it is intended to propose the first neural joint learning model for Burmese syllable segmentation, word segmentation, and part-of-speech (POS) tagging with \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{E:2021:AMP, author = "Manjunath K. E. and Srinivasa Raghavan K. M. and K. Sreenivasa Rao and Dinesh Babu Jayagopi and V. Ramasubramanian", title = "Approaches for Multilingual Phone Recognition in Code-switched and Non-code-switched Scenarios Using {Indian} Languages", journal = j-TALLIP, volume = "20", number = "4", pages = "55:1--55:19", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3437256", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3437256", abstract = "In this study, we evaluate and compare two different approaches for multilingual phone recognition in code-switched and non-code-switched scenarios. First approach is a front-end Language Identification (LID)-switched to a monolingual phone recognizer \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2021:NAM, author = "Mohinder Kumar and Manish Kumar Jindal and Munish Kumar", title = "A Novel Attack on Monochrome and Greyscale {Devanagari} {CAPTCHAs}", journal = j-TALLIP, volume = "20", number = "4", pages = "56:1--56:30", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439798", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3439798", abstract = "The use of computer programs in breaching web site security is common today. CAPTCHA (Completely Automated Public Turing test to tell Computers and Humans Apart) and human interaction proofs are the cost-effective solution to these kinds of computer \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2021:FIG, author = "Nankai Lin and Boyu Chen and Xiaotian Lin and Kanoksak Wattanachote and Shengyi Jiang", title = "A Framework for {Indonesian} Grammar Error Correction", journal = j-TALLIP, volume = "20", number = "4", pages = "57:1--57:12", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3440993", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3440993", abstract = "Grammatical Error Correction (GEC) is a challenge in Natural Language Processing research. Although many researchers have been focusing on GEC in universal languages such as English or Chinese, few studies focus on Indonesian, which is a low-resource \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shivachi:2021:LSU, author = "Casper Shikali Shivachi and Refuoe Mokhosi and Zhou Shijie and Liu Qihe", title = "Learning Syllables Using {Conv-LSTM} Model for {Swahili} Word Representation and Part-of-speech Tagging", journal = j-TALLIP, volume = "20", number = "4", pages = "58:1--58:25", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445975", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3445975", abstract = "The need to capture intra-word information in natural language processing (NLP) tasks has inspired research in learning various word representations at word, character, or morpheme levels, but little attention has been given to syllables from a syllabic \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ranathunga:2021:SAS, author = "Surangika Ranathunga and Isuru Udara Liyanage", title = "Sentiment Analysis of {Sinhala} News Comments", journal = j-TALLIP, volume = "20", number = "4", pages = "59:1--59:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445035", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3445035", abstract = "Sinhala is a low-resource language, for which basic language and linguistic tools have not been properly defined. This affects the development of NLP-based end-user applications for Sinhala. Thus, when implementing NLP tools such as sentiment analyzers, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhu:2021:GBM, author = "Junnan Zhu and Lu Xiang and Yu Zhou and Jiajun Zhang and Chengqing Zong", title = "Graph-based Multimodal Ranking Models for Multimodal Summarization", journal = j-TALLIP, volume = "20", number = "4", pages = "60:1--60:21", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445794", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3445794", abstract = "Multimodal summarization aims to extract the most important information from the multimedia input. It is becoming increasingly popular due to the rapid growth of multimedia data in recent years. There are various researches focusing on different \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lalrempuii:2021:IEM, author = "Candy Lalrempuii and Badal Soni and Partha Pakray", title = "An Improved {English-to-Mizo} Neural Machine Translation", journal = j-TALLIP, volume = "20", number = "4", pages = "61:1--61:21", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3445974", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3445974", abstract = "Machine Translation is an effort to bridge language barriers and misinterpretations, making communication more convenient through the automatic translation of languages. The quality of translations produced by corpus-based approaches predominantly depends \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Thin:2021:TNL, author = "Dang Van Thin and Ngan Luu-Thuy Nguyen and Tri Minh Truong and Lac Si Le and Duy Tin Vo", title = "Two New Large Corpora for {Vietnamese} Aspect-based Sentiment Analysis at Sentence Level", journal = j-TALLIP, volume = "20", number = "4", pages = "62:1--62:22", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446678", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3446678", abstract = "Aspect-based sentiment analysis has been studied in both research and industrial communities over recent years. For the low-resource languages, the standard benchmark corpora play an important role in the development of methods. In this article, we \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alian:2021:BAP, author = "Marwah Alian and Arafat Awajan and Ahmad Al-Hasan and Raeda Akuzhia", title = "Building {Arabic} Paraphrasing Benchmark based on Transformation Rules", journal = j-TALLIP, volume = "20", number = "4", pages = "63:1--63:17", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3446770", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3446770", abstract = "Measuring semantic similarity between short texts is an important task in many applications of natural language processing, such as paraphrasing identification. This process requires a benchmark of sentence pairs that are labeled by Arab linguists and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Prabhakar:2021:QET, author = "Dinesh Kumar Prabhakar and Sukomal Pal and Chiranjeev Kumar", title = "Query Expansion for Transliterated Text Retrieval", journal = j-TALLIP, volume = "20", number = "4", pages = "64:1--64:34", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3447649", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447649", abstract = "With Web 2.0, there has been exponential growth in the number of Web users and the volume of Web content. Most of these users are not only consumers of the information but also generators of it. People express themselves here in colloquial languages, but \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Taghizadeh:2021:CLA, author = "Nasrin Taghizadeh and Heshaam Faili", title = "Cross-lingual Adaptation Using Universal Dependencies", journal = j-TALLIP, volume = "20", number = "4", pages = "65:1--65:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448251", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3448251", abstract = "We describe a cross-lingual adaptation method based on syntactic parse trees obtained from the Universal Dependencies (UD), which are consistent across languages, to develop classifiers in low-resource languages. The idea of UD parsing is to capture \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Coban:2021:FTM, author = "{\"O}nder {\c{C}}oban and Ali Inan and Selma Ayse {\"O}zel", title = "{Facebook} Tells Me Your Gender: an Exploratory Study of Gender Prediction for {Turkish} {Facebook} Users", journal = j-TALLIP, volume = "20", number = "4", pages = "66:1--66:38", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448253", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3448253", abstract = "Online Social Networks (OSNs) are very popular platforms for social interaction. Data posted publicly over OSNs pose various threats against the individual privacy of OSN users. Adversaries can try to predict private attribute values, such as gender, as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "66", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qi:2021:DPB, author = "Shanshan Qi and Limin Zheng and Feiyu Shang", title = "Dependency Parsing-based Entity Relation Extraction over {Chinese} Complex Text", journal = j-TALLIP, volume = "20", number = "4", pages = "67:1--67:34", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450273", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3450273", abstract = "Open Relation Extraction (ORE) plays a significant role in the field of Information Extraction. It breaks the limitation that traditional relation extraction must pre-define relational types in the annotated corpus and specific domains restrictions, to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mousavi:2021:DPW, author = "Zahra Mousavi and Heshaam Faili", title = "Developing the {Persian} {Wordnet} of Verbs Using Supervised Learning", journal = j-TALLIP, volume = "20", number = "4", pages = "68:1--68:18", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450969", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3450969", abstract = "Nowadays, wordnets are extensively used as a major resource in natural language processing and information retrieval tasks. Therefore, the accuracy of wordnets has a direct influence on the performance of the involved applications. This paper presents a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Arora:2021:SSR, author = "Karunesh Kumar Arora and Shyam Sunder Agrawal", title = "Source-side Reordering to Improve Machine Translation between Languages with Distinct Word Orders", journal = j-TALLIP, volume = "20", number = "4", pages = "69:1--69:18", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3448252", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3448252", abstract = "English and Hindi have significantly different word orders. English follows the subject-verb-object (SVO) order, while Hindi primarily follows the subject-object-verb (SOV) order. This difference poses challenges to modeling this pair of languages for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumari:2021:RNS, author = "Divya Kumari and Asif Ekbal and Rejwanul Haque and Pushpak Bhattacharyya and Andy Way", title = "Reinforced {NMT} for Sentiment and Content Preservation in Low-resource Scenario", journal = j-TALLIP, volume = "20", number = "4", pages = "70:1--70:27", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450970", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3450970", abstract = "The preservation of domain knowledge from source to the target is crucial in any translation workflows. Hence, translation service providers that use machine translation (MT) in production could reasonably expect that the translation process should \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sathish:2021:ISA, author = "R. Sathish and P. Ezhumalai", title = "Intermodal Sentiment Analysis for Images with Text Captions Using the {VGGNET} Technique", journal = j-TALLIP, volume = "20", number = "4", pages = "71:1--71:14", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450971", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 14 07:03:10 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3450971", abstract = "More individuals actively express their opinions and attitudes in social media through advanced improvements such as visual content and text captions. Sentiment analysis for visuals such as images, video, and GIFs has become an emerging research trend in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Manogaran:2021:ISI, author = "Gunasekaran Manogaran and Hassan Qudrat-Ullah and Qin Xin", title = "Introduction to the Special Issue on Deep Structured Learning for Natural Language Processing, {Part 3}", journal = j-TALLIP, volume = "20", number = "5", pages = "72e:1--72e:3", month = sep, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3476464", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476464", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72e", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tan:2021:RPT, author = "Junyang Tan and Dan Xia and Shiyun Dong and Honghao Zhu and Binshi Xu", title = "Research On Pre-Training Method and Generalization Ability of Big Data Recognition Model of the {Internet of Things}", journal = j-TALLIP, volume = "20", number = "5", pages = "72:1--72:15", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3433539", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3433539", abstract = "The Internet of Things and big data are currently hot concepts and research fields. The mining, classification, and recognition of big data in the Internet of Things system are the key links that are widely of concern at present. The artificial neural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2021:SAQ, author = "Yarong Li", title = "Sequence Alignment with {Q}-Learning Based on the Actor--Critic Model", journal = j-TALLIP, volume = "20", number = "5", pages = "73:1--73:7", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3433540", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3433540", abstract = "Multiple sequence alignment methods refer to a series of algorithmic solutions for the alignment of evolutionary-related sequences while taking into account evolutionary events such as mutations, insertions, deletions, and rearrangements under certain \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Naseem:2021:CSW, author = "Usman Naseem and Imran Razzak and Shah Khalid Khan and Mukesh Prasad", title = "A Comprehensive Survey on Word Representation Models: From Classical to State-of-the-Art Word Representation Language Models", journal = j-TALLIP, volume = "20", number = "5", pages = "74:1--74:35", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434237", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3434237", abstract = "Word representation has always been an important research area in the history of natural language processing (NLP). Understanding such complex text data is imperative, given that it is rich in information and can be used widely across various \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ji:2021:OAP, author = "Xiaowen Ji and Jincheng Ni", title = "An {OT-ET} Analysis of {Polish} Singular--Plural Pairs", journal = j-TALLIP, volume = "20", number = "5", pages = "75:1--75:12", month = may, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434238", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3434238", abstract = "Optimality Theory (OT) and Exemplar Theory (ET) are two enchanting theories to many scholars, but each still faces criticism and remaining persistent problems. Application of both theories to areas in linguistics where conflicts may arise has been \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jing:2021:GGM, author = "Weipeng Jing and Xianyang Song and Donglin Di and Houbing Song", title = "{geoGAT}: Graph Model Based on Attention Mechanism for Geographic Text Classification", journal = j-TALLIP, volume = "20", number = "5", pages = "76:1--76:18", month = sep, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3434239", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3434239", abstract = "In the area of geographic information processing, there are few researches on geographic text classification. However, the application of this task in Chinese is relatively rare. In our work, we intend to implement a method to extract text containing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bi:2021:BDL, author = "Mingwen Bi and Qingchuan Zhang and Min Zuo and Zelong Xu and Qingyu Jin", title = "Bi-directional Long Short-Term Memory Model with Semantic Positional Attention for the Question Answering System", journal = j-TALLIP, volume = "20", number = "5", pages = "77:1--77:13", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439800", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3439800", abstract = "The intelligent question answering system aims to provide quick and concise feedback on the questions of users. Although the performance of phrase-level and numerous attention models have been improved, the sentence components and position information are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fan:2021:DNN, author = "Xiaoqian Fan and Bowen Yang and Wenzhi Chen and Quanfang Fan", title = "Deep Neural Network Based Noised {Asian} Speech Enhancement and Its Implementation on a Hearing Aid App", journal = j-TALLIP, volume = "20", number = "5", pages = "78:1--78:14", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439797", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3439797", abstract = "This article studies noised Asian speech enhancement based on the deep neural network (DNN) and its implementation on an app. We use the THCHS-30 speech dataset and the common noise dataset in daily life as training and testing data of the DNN. To stack \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2021:MOH, author = "Chunhe Zhao and Balaanand Muthu and P. Mohamed Shakeel", title = "Multi-Objective Heuristic Decision Making and Benchmarking for Mobile Applications in {English} Language Learning", journal = j-TALLIP, volume = "20", number = "5", pages = "79:1--79:16", month = sep, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3439799", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3439799", abstract = "This research proposes to evaluate and analyze the decision matrix for learner's English mobile applications (EMAs) based on multi-objective heuristic decision making with a view to listening, speaking, reading, and writing. Because of the number of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gupta:2021:TIC, author = "Vedika Gupta and Nikita Jain and Shubham Shubham and Agam Madan and Ankit Chaudhary and Qin Xin", title = "Toward Integrated {CNN}-based Sentiment Analysis of Tweets for Scarce-resource Language-{Hindi}", journal = j-TALLIP, volume = "20", number = "5", pages = "80:1--80:23", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3450447", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3450447", abstract = "Linguistic resources for commonly used languages such as English and Mandarin Chinese are available in abundance, hence the existing research in these languages. However, there are languages for which linguistic resources are scarcely available. One of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Do:2021:DVT, author = "Phuc Do and Truong H. V. Phan and Brij B. Gupta", title = "Developing a {Vietnamese} Tourism Question Answering System Using Knowledge Graph and Deep Learning", journal = j-TALLIP, volume = "20", number = "5", pages = "81:1--81:18", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453651", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3453651", abstract = "In recent years, Question Answering (QA) systems have increasingly become very popular in many sectors. This study aims to use a knowledge graph and deep learning to develop a QA system for tourism in Vietnam. First, the QA system replies to a user's \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2021:REU, author = "Meng Li", title = "Research on Extraction of Useful Tourism Online Reviews Based on Multimodal Feature Fusion", journal = j-TALLIP, volume = "20", number = "5", pages = "82:1--82:16", month = sep, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453694", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3453694", abstract = "To effectively identify the influencing factors of the perceived usefulness of multimodal data in online reviews of tourism products, this article explores the optimization method of online tourism products based on user-generated content and conducts \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2021:TCA, author = "Lin Sun and Wenzheng Xu and Jimin Liu", title = "Two-channel Attention Mechanism Fusion Model of Stock Price Prediction Based on {CNN-LSTM}", journal = j-TALLIP, volume = "20", number = "5", pages = "83:1--83:12", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453693", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3453693", abstract = "Using hierarchical CNN, the company's multiple news is characterized as three levels: sentence vectors, chapter vectors, and enterprise sentiment vectors. By combining the stock price data with the news lyric data at the same time, the influence of news \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2021:HCL, author = "Praphula Kumar Jain and Vijayalakshmi Saravanan and Rajendra Pamula", title = "A Hybrid {CNN-LSTM}: a Deep Learning Approach for Consumer Sentiment Analysis Using Qualitative User-Generated Contents", journal = j-TALLIP, volume = "20", number = "5", pages = "84:1--84:15", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457206", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3457206", abstract = "With the fastest growth of information and communication technology (ICT), the availability of web content on social media platforms is increasing day by day. Sentiment analysis from online reviews drawing researchers' attention from various organizations \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Deng:2021:CCB, author = "Fei Deng and Timothy V. Rasinski", title = "A Computer Corpus-Based Study of {Chinese} {EFL} Learners' Use of Adverbial Connectors and Its Implications for Building a Language-Based Learning Environment", journal = j-TALLIP, volume = "20", number = "5", pages = "85:1--85:16", month = jun, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457987", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3457987", abstract = "This research adopts the methodology of corpus-based analysis and contrastive interlanguage analysis (CIA), using three corpora as the data source to analyze the adverbial connectors used by Chinese EFL (English as a foreign language) learners (i.e., \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Deng:2021:CPC, author = "Yongliang Deng and Hua Zhang", title = "Configurational Path to {Chinese} Reading Stickiness of Digital Library", journal = j-TALLIP, volume = "20", number = "5", pages = "86:1--86:18", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3459092", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3459092", abstract = "Attracting and retaining readers in an increasingly competitive environment is an urgent problem for digital libraries of original literature. However, few empirical studies address online reading stickiness, particularly the factors affecting the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Javed:2021:BSS, author = "Abdul Rehman Javed and Saif Ur Rehman and Mohib Ullah Khan and Mamoun Alazab and Habib Ullah Khan", title = "{Betalogger}: Smartphone Sensor-based Side-channel Attack Detection and Text Inference Using Language Modeling and Dense {MultiLayer} Neural Network", journal = j-TALLIP, volume = "20", number = "5", pages = "87:1--87:17", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3460392", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3460392", abstract = "With the recent advancement of smartphone technology in the past few years, smartphone usage has increased on a tremendous scale due to its portability and ability to perform many daily life tasks. As a result, smartphones have become one of the most \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lavanya:2021:MRS, author = "R. Lavanya and B. Bharathi", title = "Movie Recommendation System to Solve Data Sparsity Using Collaborative Filtering Approach", journal = j-TALLIP, volume = "20", number = "5", pages = "88:1--88:14", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3459091", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3459091", abstract = "With the increase in numbers of multimedia technologies around us, movies and videos on social media and OTT platforms are growing, making it confusing for users to decide which one to watch for. For this, movie recommendation systems are widely used. It \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2021:IAV, author = "Jun Ma and Hongzhi Yu and Yan Xu and Kaiying Deng", title = "An Investigational Approach for Vowels of the {Salar} Language Based on a Database of Speech Acoustic Parameters", journal = j-TALLIP, volume = "20", number = "5", pages = "89:1--89:10", month = sep, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3459927", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3459927", abstract = "According to relevant specifications, this article divides, marks, and extracts the acquired speech signals of the Salar language, and establishes the speech acoustic parameter database of the Salar language. Then, the vowels of the Salar language are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "89", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2021:SAU, author = "Akshi Kumar and Victor Hugo C. Albuquerque", title = "Sentiment Analysis Using {XLM-R} Transformer and Zero-shot Transfer Learning on Resource-poor {Indian} Language", journal = j-TALLIP, volume = "20", number = "5", pages = "90:1--90:13", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3461764", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3461764", abstract = "Sentiment analysis on social media relies on comprehending the natural language and using a robust machine learning technique that learns multiple layers of representations or features of the data and produces state-of-the-art prediction results. The \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "90", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2021:NRO, author = "Zhou Zhou and Fangmin Li and Shuiqiao Yang", title = "A Novel Resource Optimization Algorithm Based on Clustering and Improved Differential Evolution Strategy Under a Cloud Environment", journal = j-TALLIP, volume = "20", number = "5", pages = "91:1--91:15", month = jul, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462761", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Oct 5 08:44:30 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3462761", abstract = "Resource optimization algorithm based on clustering and improved differential evolution strategy, as a new global optimized algorithm, has wide applications in language translation, language processing, document understanding, cloud computing, and edge \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "91", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tan:2021:BBT, author = "Minghuan Tan and Jing Jiang and Bing Tian Dai", title = "A {BERT}-Based Two-Stage Model for {Chinese Chengyu} Recommendation", journal = j-TALLIP, volume = "20", number = "6", pages = "92:1--92:18", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3453185", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3453185", abstract = "In Chinese, Chengyu are fixed phrases consisting of four characters. As a type of idioms, their meanings usually cannot be derived from their component characters. In this article, we study the task of recommending a Chengyu given a textual context. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "92", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xiang:2021:RCL, author = "Lu Xiang and Junnan Zhu and Yang Zhao and Yu Zhou and Chengqing Zong", title = "Robust Cross-lingual Task-oriented Dialogue", journal = j-TALLIP, volume = "20", number = "6", pages = "93:1--93:24", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457571", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3457571", abstract = "Cross-lingual dialogue systems are increasingly important in e-commerce and customer service due to the rapid progress of globalization. In real-world system deployment, machine translation (MT) services are often used before and after the dialogue system \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "93", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Premjith:2021:DLA, author = "B. Premjith and K. P. Soman", title = "Deep Learning Approach for the Morphological Synthesis in {Malayalam} and {Tamil} at the Character Level", journal = j-TALLIP, volume = "20", number = "6", pages = "94:1--94:17", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3457976", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3457976", abstract = "Morphological synthesis is one of the main components of Machine Translation (MT) frameworks, especially when any one or both of the source and target languages are morphologically rich. Morphological synthesis is the process of combining two words or two \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "94", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mundotiya:2021:LRB, author = "Rajesh Kumar Mundotiya and Manish Kumar Singh and Rahul Kapur and Swasti Mishra and Anil Kumar Singh", title = "Linguistic Resources for {Bhojpuri}, {Magahi}, and {Maithili}: Statistics about Them, Their Similarity Estimates, and Baselines for Three Applications", journal = j-TALLIP, volume = "20", number = "6", pages = "95:1--95:37", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3458250", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3458250", abstract = "Corpus preparation for low-resource languages and for development of human language technology to analyze or computationally process them is a laborious task, primarily due to the unavailability of expert linguists who are native speakers of these \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "95", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Telemala:2021:ETL, author = "Joseph P. Telemala and Hussein Suleman", title = "Exploring Topic-language Preferences in Multilingual {Swahili} Information Retrieval in {Tanzania}", journal = j-TALLIP, volume = "20", number = "6", pages = "96:1--96:30", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3458671", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3458671", abstract = "Habitual switching of languages is a common behaviour among polyglots when searching for information on the Web. Studies in information retrieval (IR) and multilingual information retrieval (MLIR) suggest that part of the reason for such regular switching \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "96", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tian:2021:RRO, author = "Yaolin Tian and Weize Gao and Xuxing Liu and Shanxiong Chen and Bofeng Mo", title = "The Research on Rejoining of the Oracle Bone Rubbings Based on Curve Matching", journal = j-TALLIP, volume = "20", number = "6", pages = "97:1--97:17", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3460393", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3460393", abstract = "The rejoining of oracle bone rubbings is a fundamental topic for oracle research. However, it is a tough task to reassemble severely broken oracle bone rubbings because of detail loss in manual labeling, the great time consumption of rejoining, and the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "97", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Munir:2021:NUS, author = "Kashif Munir and Hai Zhao and Zuchao Li", title = "Neural Unsupervised Semantic Role Labeling", journal = j-TALLIP, volume = "20", number = "6", pages = "98:1--98:16", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3461613", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3461613", abstract = "The task of semantic role labeling (SRL) is dedicated to finding the predicate-argument structure. Previous works on SRL are mostly supervised and do not consider the difficulty in labeling each example which can be very expensive and time-consuming. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "98", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saha:2021:UDM, author = "Tulika Saha and Dhawal Gupta and Sriparna Saha and Pushpak Bhattacharyya", title = "A Unified Dialogue Management Strategy for Multi-intent Dialogue Conversations in Multiple Languages", journal = j-TALLIP, volume = "20", number = "6", pages = "99:1--99:22", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3461763", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3461763", abstract = "Building Virtual Agents capable of carrying out complex queries of the user involving multiple intents of a domain is quite a challenge, because it demands that the agent manages several subtasks simultaneously. This article presents a universal Deep \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "99", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ni:2021:MDT, author = "Weijian Ni and Tong Liu and Qingtian Zeng and Nengfu Xie", title = "Mining Domain Terminologies Using Search Engine's Query Log", journal = j-TALLIP, volume = "20", number = "6", pages = "100:1--100:32", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462327", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3462327", abstract = "Domain terminologies are a basic resource for various natural language processing tasks. To automatically discover terminologies for a domain of interest, most traditional approaches mostly rely on a domain-specific corpus given in advance; thus, the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "100", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2021:CDG, author = "Jun Xu and Zeyang Lei and Haifeng Wang and Zheng-Yu Niu and Hua Wu and Wanxiang Che and Jizhou Huang and Ting Liu", title = "Coherent Dialog Generation with Query Graph", journal = j-TALLIP, volume = "20", number = "6", pages = "101:1--101:23", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3462551", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3462551", abstract = "Learning to generate coherent and informative dialogs is an enduring challenge for open-domain conversation generation. Previous work leverage knowledge graph or documents to facilitate informative dialog generation, with little attention on dialog \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "101", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Joshi:2021:SSG, author = "Manju Lata Joshi and Nisheeth Joshi and Namita Mittal", title = "{SGATS}: Semantic Graph-based Automatic Text Summarization from {Hindi} Text Documents", journal = j-TALLIP, volume = "20", number = "6", pages = "102:1--102:32", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464381", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464381", abstract = "Creating a coherent summary of the text is a challenging task in the field of Natural Language Processing (NLP). Various Automatic Text Summarization techniques have been developed for abstractive as well as extractive summarization. This study focuses on \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "102", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Byambadorj:2021:NTM, author = "Zolzaya Byambadorj and Ryota Nishimura and Altangerel Ayush and Norihide Kitaoka", title = "Normalization of Transliterated {Mongolian} Words Using {Seq2Seq} Model with Limited Data", journal = j-TALLIP, volume = "20", number = "6", pages = "103:1--103:19", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464361", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464361", abstract = "The huge increase in social media use in recent years has resulted in new forms of social interaction, changing our daily lives. Due to increasing contact between people from different cultures as a result of globalization, there has also been an increase \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "103", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaing:2021:TTP, author = "Hour Kaing and Chenchen Ding and Masao Utiyama and Eiichiro Sumita and Sethserey Sam and Sopheap Seng and Katsuhito Sudoh and Satoshi Nakamura", title = "Towards Tokenization and Part-of-Speech Tagging for {Khmer}: Data and Discussion", journal = j-TALLIP, volume = "20", number = "6", pages = "104:1--104:16", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464378", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464378", abstract = "As a highly analytic language, Khmer has considerable ambiguities in tokenization and part-of-speech (POS) tagging processing. This topic is investigated in this study. Specifically, a 20,000-sentence Khmer corpus with manual tokenization and POS-tagging \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "104", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tian:2021:NCM, author = "Xiuxia Tian and Can Li and Bo Zhao", title = "A Novel Classification Model {SA-MPCNN} for Power Equipment Defect Text", journal = j-TALLIP, volume = "20", number = "6", pages = "105:1--105:21", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464380", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464380", abstract = "The text classification of power equipment defect is of great significance to equipment health condition evaluation and power equipment maintenance decisions. Most of the existing classification methods do not sufficiently consider the semantic relation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "105", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sen:2021:BGT, author = "Shibaprasad Sen and Ankan Bhattacharyya and Ram Sarkar and Kaushik Roy", title = "{BYANJON}: a Ground Truth Preparation System for Online Handwritten {Bangla} Documents", journal = j-TALLIP, volume = "20", number = "6", pages = "106:1--106:16", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464379", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464379", abstract = "The work reported in this article deals with the ground truth generation scheme for online handwritten Bangla documents at text-line, word, and stroke levels. The aim of the proposed scheme is twofold: firstly, to build a document level database so that \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "106", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Maimaiti:2021:IDA, author = "Mieradilijiang Maimaiti and Yang Liu and Huanbo Luan and Zegao Pan and Maosong Sun", title = "Improving Data Augmentation for Low-Resource {NMT} Guided by {POS}-Tagging and Paraphrase Embedding", journal = j-TALLIP, volume = "20", number = "6", pages = "107:1--107:21", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464427", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464427", abstract = "Data augmentation is an approach for several text generation tasks. Generally, in the machine translation paradigm, mainly in low-resource language scenarios, many data augmentation methods have been proposed. The most used approaches for generating \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "107", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Vo:2021:SIS, author = "Tham Vo", title = "{SE4ExSum}: an Integrated Semantic-aware Neural Approach with Graph Convolutional Network for Extractive Text Summarization", journal = j-TALLIP, volume = "20", number = "6", pages = "108:1--108:22", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464426", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464426", abstract = "Recently, advanced techniques in deep learning such as recurrent neural network (GRU, LSTM and Bi-LSTM) and auto-encoding (attention-based transformer and BERT) have achieved great successes in multiple application domains including text summarization. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "108", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2021:MCS, author = "Lei Liu and Hao Chen and Yinghong Sun", title = "A Multi-Classification Sentiment Analysis Model of {Chinese} Short Text Based on Gated Linear Units and Attention Mechanism", journal = j-TALLIP, volume = "20", number = "6", pages = "109:1--109:13", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3464425", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464425", abstract = "Sentiment analysis of social media texts has become a research hotspot in information processing. Sentiment analysis methods based on the combination of machine learning and sentiment lexicon need to select features. Selected emotional features are often \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "109", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Niwa:2021:CCR, author = "Ayana Niwa and Naoaki Okazaki and Kohei Wakimoto and Keisuke Nishiguchi and Masataka Mouri", title = "Construction of a Corpus of Rhetorical Devices in Slogans and Structural Analysis of Antitheses", journal = j-TALLIP, volume = "20", number = "6", pages = "110:1--110:26", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3465218", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3465218", abstract = "An advertising slogan is a sentence that expresses a product or a work of art in a straightforward manner and is used for advertising and publicity. Moving the consumer's mind and attracting their interest can significantly influence sales. Although \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "110", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shin:2021:EEA, author = "Jaehun Shin and Wonkee Lee and Byung-Hyun Go and Baikjin Jung and Youngkil Kim and Jong-Hyeok Lee", title = "Exploration of Effective Attention Strategies for Neural Automatic Post-editing with Transformer", journal = j-TALLIP, volume = "20", number = "6", pages = "111:1--111:17", month = nov, year = "2021", CODEN = "????", DOI = "https://doi.org/10.1145/3465383", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 16 05:29:47 MDT 2021", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3465383", abstract = "Automatic post-editing (APE) is the study of correcting translation errors in the output of an unknown machine translation (MT) system and has been considered as a method of improving translation quality without any modification to conventional MT \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "111", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2022:ISI, author = "Akshi Kumar and Christian Esposito and Dimitrios A. Karras", title = "Introduction to Special Issue on Misinformation, Fake News and Rumor Detection in Low-Resource Languages", journal = j-TALLIP, volume = "21", number = "1", pages = "1e:1--1e:3", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3505588", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3505588", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1e", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sangwan:2022:DCD, author = "Saurabh R. Sangwan and M. P. S. Bhatia", title = "Denigrate Comment Detection in Low-Resource {Hindi} Language Using Attention-Based Residual Networks", journal = j-TALLIP, volume = "21", number = "1", pages = "1:1--1:14", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3431729", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3431729", abstract = "Cyberspace has been recognized as a conducive environment for use of various hostile, direct, and indirect behavioural tactics to target individuals or groups. Denigration is one of the most frequently used cyberbullying ploys to actively damage, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhowmick:2022:MDF, author = "Rajat Subhra Bhowmick and Isha Ganguli and Jayanta Paul and Jaya Sil", title = "A Multimodal Deep Framework for Derogatory Social Media Post Identification of a Recognized Person", journal = j-TALLIP, volume = "21", number = "1", pages = "2:1--2:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3447651", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447651", abstract = "In today's era of digitization, social media platforms play a significant role in networking and influencing the perception of the general population. Social network sites have recently been used to carry out harmful attacks against individuals, including \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2022:FNC, author = "Rachna Jain and Deepak Kumar Jain and Dharana and Nitika Sharma", title = "Fake News Classification: a Quantitative Research Description", journal = j-TALLIP, volume = "21", number = "1", pages = "3:1--3:17", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3447650", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3447650", abstract = "Social media can render content circulating to reach millions with a knack to influence people, despite the questionable authencity of the facts. Internet sources are the most convenient and easy approach to obtain any information these days. Fake news \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ranasinghe:2022:MOL, author = "Tharindu Ranasinghe and Marcos Zampieri", title = "Multilingual Offensive Language Identification for Low-resource Languages", journal = j-TALLIP, volume = "21", number = "1", pages = "4:1--4:13", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3457610", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3457610", abstract = "Offensive content is pervasive in social media and a reason for concern to companies and government organizations. Several studies have been recently published investigating methods to detect the various forms of such content (e.g., hate speech, \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2022:DLA, author = "Soma Das and Pooja Rai and Sanjay Chatterji", title = "Deep Level Analysis of Legitimacy in {Bengali} News Sentences", journal = j-TALLIP, volume = "21", number = "1", pages = "5:1--5:18", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3459928", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3459928", abstract = "The tremendous increase in the growth of misinformation in news articles has the potential threat for the adverse effects on society. Hence, the detection of misinformation in news data has become an appealing research area. The task of annotating and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saeed:2022:ECE, author = "Ramsha Saeed and Hammad Afzal and Haider Abbas and Maheen Fatima", title = "Enriching Conventional Ensemble Learner with Deep Contextual Semantics to Detect Fake News in {Urdu}", journal = j-TALLIP, volume = "21", number = "1", pages = "6:1--6:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461614", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3461614", abstract = "Increased connectivity has contributed greatly in facilitating rapid access to information and reliable communication. However, the uncontrolled information dissemination has also resulted in the spread of fake news. Fake news might be spread by a group \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gumaei:2022:EAR, author = "Abdu Gumaei and Mabrook S. Al-Rakhami and Mohammad Mehedi Hassan and Victor Hugo C. {De Albuquerque} and David Camacho", title = "An Effective Approach for Rumor Detection of {Arabic} Tweets Using {eXtreme} Gradient Boosting Method", journal = j-TALLIP, volume = "21", number = "1", pages = "7:1--7:16", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3461697", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3461697", abstract = "Twitter is currently one of the most popular microblogging platforms allowing people to post short messages, news, thoughts, and so on. The Twitter user community is growing very fast. It has an average of 328 million active accounts today, making it one \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dhall:2022:BBF, author = "Sakshi Dhall and Ashutosh Dhar Dwivedi and Saibal K. Pal and Gautam Srivastava", title = "Blockchain-based Framework for Reducing Fake or Vicious News Spread on Social Media\slash Messaging Platforms", journal = j-TALLIP, volume = "21", number = "1", pages = "8:1--8:33", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3467019", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3467019", abstract = "With social media becoming the most frequently used mode of modern-day communications, the propagation of fake or vicious news through such modes of communication has emerged as a serious problem. The scope of the problem of fake or vicious news may range \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{De:2022:TBA, author = "Arkadipta De and Dibyanayan Bandyopadhyay and Baban Gain and Asif Ekbal", title = "A Transformer-Based Approach to Multilingual Fake News Detection in Low-Resource Languages", journal = j-TALLIP, volume = "21", number = "1", pages = "9:1--9:20", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3472619", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3472619", abstract = "Fake news classification is one of the most interesting problems that has attracted huge attention to the researchers of artificial intelligence, natural language processing, and machine learning (ML). Most of the current works on fake news detection are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Samadi:2022:PFN, author = "Mohammadreza Samadi and Maryam Mousavian and Saeedeh Momtazi", title = "{Persian} Fake News Detection: Neural Representation and Classification at Word and Text Levels", journal = j-TALLIP, volume = "21", number = "1", pages = "10:1--10:11", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3472620", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3472620", abstract = "Nowadays, broadcasting news on social media and websites has grown at a swifter pace, which has had negative impacts on both the general public and governments; hence, this has urged us to build a fake news detection system. Contextualized word embeddings \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Najadat:2022:DAS, author = "Hassan Najadat and Mohammad A. Alzubaidi and Islam Qarqaz", title = "Detecting {Arabic} Spam Reviews in Social Networks Based on Classification Algorithms", journal = j-TALLIP, volume = "21", number = "1", pages = "11:1--11:13", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476115", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476115", abstract = "Reviews or comments that users leave on social media have great importance for companies and business entities. New product ideas can be evaluated based on customer reactions. However, this use of social media is complicated by those who post spam on \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jahanbakhsh-Nagadeh:2022:DCB, author = "Zoleikha Jahanbakhsh-Nagadeh and Mohammad-Reza Feizi-Derakhshi and Arash Sharifi", title = "A Deep Content-Based Model for {Persian} Rumor Verification", journal = j-TALLIP, volume = "21", number = "1", pages = "12:1--12:29", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487289", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487289", abstract = "During the development of social media, there has been a transformation in social communication. Despite their positive applications in social interactions and news spread, it also provides an ideal platform for spreading rumors. Rumors can endanger the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alam:2022:RUP, author = "Mehreen Alam and Sibt {Ul Hussain}", title = "{Roman--Urdu--Parl}: {Roman--Urdu} and {Urdu} Parallel Corpus for {Urdu} Language Understanding", journal = j-TALLIP, volume = "21", number = "1", pages = "13:1--13:20", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3464424", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3464424", abstract = "Availability of corpora is a basic requirement for conducting research in a particular language. Unfortunately, for a morphologically rich language like Urdu, despite being used by over a 100 million people around the globe, the dearth of corpora is a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nassif:2022:EES, author = "Ali Bou Nassif and Abdollah Masoud Darya and Ashraf Elnagar", title = "Empirical Evaluation of Shallow and Deep Learning Classifiers for {Arabic} Sentiment Analysis", journal = j-TALLIP, volume = "21", number = "1", pages = "14:1--14:25", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3466171", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3466171", abstract = "This work presents a detailed comparison of the performance of deep learning models such as convolutional neural networks, long short-term memory, gated recurrent units, their hybrids, and a selection of shallow learning classifiers for sentiment analysis \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Phukon:2022:SEU, author = "Bornali Phukon and Akash Anil and Sanasam Ranbir Singh and Priyankoo Sarmah", title = "Synonymy Expansion Using Link Prediction Methods: a Case Study of {Assamese} {WordNet}", journal = j-TALLIP, volume = "21", number = "1", pages = "15:1--15:21", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3467966", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3467966", abstract = "WordNets built for low-resource languages, such as Assamese, often use the expansion methodology. This may result in missing lexical entries and missing synonymy relations. As the Assamese WordNet is also built using the expansion method, using the Hindi \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Eddine:2022:NCE, author = "Meftah Mohammed Charaf Eddine", title = "A New Concept of Electronic Text Based on Semantic Coding System for Machine Translation", journal = j-TALLIP, volume = "21", number = "1", pages = "16:1--16:16", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469655", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3469655", abstract = "In the field of machine translation of texts, the ambiguity in both lexical (dictionary) and structural aspects is still one of the difficult problems. Researchers in this field use different approaches, the most important of which is machine learning in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xiang:2022:EGN, author = "Yan Xiang and Zhengtao Yu and Junjun Guo and Yuxin Huang and Yantuan Xian", title = "Event Graph Neural Network for Opinion Target Classification of Microblog Comments", journal = j-TALLIP, volume = "21", number = "1", pages = "17:1--17:13", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469725", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3469725", abstract = "Opinion target classification of microblog comments is one of the most important tasks for public opinion analysis about an event. Due to the high cost of manual labeling, opinion target classification is generally considered as a weak-supervised task. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Turan:2022:CIA, author = "Erhan Turan and Umut Orhan", title = "Confidence Indexing of Automated Detected Synsets: a Case Study on Contemporary {Turkish} Dictionary", journal = j-TALLIP, volume = "21", number = "1", pages = "18:1--18:19", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469724", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3469724", abstract = "In this study, a novel confidence indexing algorithm is proposed to minimize human labor in controlling the reliability of automatically extracted synsets from a non-machine-readable monolingual dictionary. Contemporary Turkish Dictionary of Turkish \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Baruah:2022:LRN, author = "Rupjyoti Baruah and Rajesh Kumar Mundotiya and Anil Kumar Singh", title = "Low Resource Neural Machine Translation: {Assamese} to\slash from Other {Indo--Aryan} ({Indic}) Languages", journal = j-TALLIP, volume = "21", number = "1", pages = "19:1--19:32", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469721", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3469721", abstract = "Machine translation (MT) systems have been built using numerous different techniques for bridging the language barriers. These techniques are broadly categorized into approaches like Statistical Machine Translation (SMT) and Neural Machine Translation (\ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fadel:2022:NAT, author = "Ali Fadel and Ibraheem Tuffaha and Mahmoud Al-Ayyoub", title = "Neural {Arabic} Text Diacritization: State-of-the-Art Results and a Novel Approach for {Arabic} {NLP} Downstream Tasks", journal = j-TALLIP, volume = "21", number = "1", pages = "20:1--20:25", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3470849", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3470849", abstract = "In this work, we present several deep learning models for the automatic diacritization of Arabic text. Our models are built using two main approaches, viz. Feed-Forward Neural Network (FFNN) and Recurrent Neural Network (RNN), with several enhancements \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kulkarni:2022:SAH, author = "Dhanashree S. Kulkarni and Sunil S. Rodd", title = "Sentiment Analysis in {Hindi} --- a Survey on the State-of-the-art Techniques", journal = j-TALLIP, volume = "21", number = "1", pages = "21:1--21:46", month = jan, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3469722", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Jan 31 07:33:24 MST 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3469722", abstract = "Sentiment Analysis (SA) has been a core interest in the field of text mining research, dealing with computational processing of sentiments, views, and subjective nature of the text. Due to the availability of extensive web-based data in Indian languages \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2022:ICV, author = "Zhiqiang Yu and Zhengtao Yu and Yantuan Xian and Yuxin Huang and Junjun Guo", title = "Improving {Chinese--Vietnamese} Neural Machine Translation with Linguistic Differences", journal = j-TALLIP, volume = "21", number = "2", pages = "22:1--22:12", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477536", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3477536", abstract = "We present a simple, efficient data augmentation approach for boosting Chinese-Vietnamese neural machine translation performance by leveraging the linguistic difference between the two languages. We first define the formalized representation of modifier \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Halabi:2022:INA, author = "Dana Halabi and Ebaa Fayyoumi and Arafat Awajan", title = "{I3rab}: a New {Arabic} Dependency Treebank Based on {Arabic} Grammatical Theory", journal = j-TALLIP, volume = "21", number = "2", pages = "23:1--23:32", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3472295", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3472295", abstract = "Treebanks are valuable linguistic resources that include the syntactic structure of a language sentence in addition to part-of-speech tags and morphological features. They are mainly utilized in modeling statistical parsers. Although the statistical \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2022:ASN, author = "Haitong Yang and Guangyou Zhou and Tingting He", title = "Adversarial Separation Network for Text Style Transfer", journal = j-TALLIP, volume = "21", number = "2", pages = "24:1--24:14", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3472621", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3472621", abstract = "This article considers the task of text style transfer: transforming a specific style of sentence into another while preserving its style-independent content. A dominate approach to text style transfer is to learn a good content factor of text, define a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fatima:2022:DCL, author = "Ghazeefa Fatima and Rao Muhammad Adeel Nawab and Muhammad Salman Khan and Ali Saeed", title = "Developing a Cross-lingual Semantic Word Similarity Corpus for {English--Urdu} Language Pair", journal = j-TALLIP, volume = "21", number = "2", pages = "25:1--25:16", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3472618", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3472618", abstract = "Semantic word similarity is a quantitative measure of how much two words are contextually similar. Evaluation of semantic word similarity models requires a benchmark corpus. However, despite the millions of speakers and the large digital text of the Urdu \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Muneer:2022:CLT, author = "Iqra Muneer and Rao Muhammad Adeel Nawab", title = "Cross-lingual Text Reuse Detection Using Translation Plus Monolingual Analysis for {English-Urdu} Language Pair", journal = j-TALLIP, volume = "21", number = "2", pages = "26:1--26:18", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473331", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3473331", abstract = "Cross-Lingual Text Reuse Detection (CLTRD) has recently attracted the attention of the research community due to a large amount of digital text readily available for reuse in multiple languages through online digital repositories. In addition, efficient \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2022:LRL, author = "Fan Xu and Yangjie Dan and Keyu Yan and Yong Ma and Mingwen Wang", title = "Low-Resource Language Discrimination toward {Chinese} Dialects with Transfer Learning and Data Augmentation", journal = j-TALLIP, volume = "21", number = "2", pages = "27:1--27:21", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3473499", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3473499", abstract = "Chinese dialects discrimination is a challenging natural language processing task due to scarce annotation resource. In this article, we develop a novel Chinese dialects discrimination framework with transfer learning and data augmentation (CDDTLDA) in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rana:2022:UAS, author = "Toqir A. Rana and Kiran Shahzadi and Tauseef Rana and Ahsan Arshad and Mohammad Tubishat", title = "An Unsupervised Approach for Sentiment Analysis on Social Media Short Text Classification in {Roman Urdu}", journal = j-TALLIP, volume = "21", number = "2", pages = "28:1--28:16", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474119", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474119", abstract = "During the last two decades, sentiment analysis, also known as opinion mining, has become one of the most explored research areas in Natural Language Processing (NLP) and data mining. Sentiment analysis focuses on the sentiments or opinions of consumers \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mukherjee:2022:URL, author = "Jayati Mukherjee and Swapan K. Parui and Utpal Roy", title = "An Unsupervised and Robust Line and Word Segmentation Method for Handwritten and Degraded Printed Document", journal = j-TALLIP, volume = "21", number = "2", pages = "29:1--29:31", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474118", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474118", abstract = "Segmentation of text lines and words in an unconstrained handwritten or a machine-printed degraded document is a challenging document analysis problem due to the heterogeneity in the document structure. Often there is un-even skew between the lines and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mukta:2022:CGB, author = "Md. Saddam Hossain Mukta and Md. Adnanul Islam and Faisal Ahamed Khan and Afjal Hossain and Shuvanon Razik and Shazzad Hossain and Jalal Mahmud", title = "A Comprehensive Guideline for {Bengali} Sentiment Annotation", journal = j-TALLIP, volume = "21", number = "2", pages = "30:1--30:19", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474363", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474363", abstract = "Sentiment Analysis (SA) is a Natural Language Processing (NLP) and an Information Extraction (IE) task that primarily aims to obtain the writer's feelings expressed in positive or negative by analyzing a large number of documents. SA is also widely \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hu:2022:GFQ, author = "Yue Hu and Haitong Yang and Guangyou Zhou and Jimmy Xiangji Huang", title = "Generating Factoid Questions with Question Type Enhanced Representation and Attention-based Copy Mechanism", journal = j-TALLIP, volume = "21", number = "2", pages = "31:1--31:18", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474555", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474555", abstract = "Question generation over knowledge bases is an important research topic. How to deal with rare and low-frequency words in traditional generation models is a key challenge for question generation. Although the copy mechanism provides significant \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2022:CSH, author = "Pawan Kumar Singh and Ram Sarkar and Ajith Abraham and Mita Nasipuri", title = "A Case Study on Handwritten {Indic} Script Classification: Benchmarking of the Results at Page, Block, Text-line, and Word Levels", journal = j-TALLIP, volume = "21", number = "2", pages = "32:1--32:36", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476102", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476102", abstract = "Handwritten script classification is still considered as a challenging research problem in the domain of document image analysis. Although some research attempts have been made by the researchers for solving the challenging issues, a comprehensive \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gu:2022:MTF, author = "Xiaoqing Gu and Kaijian Xia and Yizhang Jiang and Alireza Jolfaei", title = "Multi-task Fuzzy Clustering-Based Multi-task {TSK} Fuzzy System for Text Sentiment Classification", journal = j-TALLIP, volume = "21", number = "2", pages = "33:1--33:24", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476103", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476103", abstract = "Text sentiment classification is an important technology for natural language processing. A fuzzy system is a strong tool for processing imprecise or ambiguous data, and it can be used for text sentiment analysis. This article proposes a new formulation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarwar:2022:UWU, author = "Raheem Sarwar and Saeed-Ul Hassan", title = "{UrduAI}: Writeprints for {Urdu} Authorship Identification", journal = j-TALLIP, volume = "21", number = "2", pages = "34:1--34:18", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476467", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476467", abstract = "The authorship identification task aims at identifying the original author of an anonymous text sample from a set of candidate authors. It has several application domains such as digital text forensics and information retrieval. These application domains \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fatima:2022:SAW, author = "Tayyaba Fatima and Raees {Ul Islam} and Muhammad Waqas Anwar and M. Hasan Jamal and M. Tayyab Chaudhry and Zeeshan Gillani", title = "{STEMUR}: an Automated Word Conflation Algorithm for the {Urdu} Language", journal = j-TALLIP, volume = "21", number = "2", pages = "35:1--35:20", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476226", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476226", abstract = "Stemming is a common word conflation method that perceives stems embedded in the words and decreases them to their stem (root) by conflating all the morphologically related terms into a single term, without doing a complete morphological analysis. This \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hiraoka:2022:RNH, author = "Tatsuya Hiraoka and Sho Takase and Kei Uchiumi and Atsushi Keyaki and Naoaki Okazaki", title = "Recurrent Neural Hidden {Markov} Model for High-order Transition", journal = j-TALLIP, volume = "21", number = "2", pages = "36:1--36:15", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476511", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476511", abstract = "We propose a method to pay attention to high-order relations among latent states to improve the conventional HMMs that focus only on the latest latent state, since they assume Markov property. To address the high-order relations, we apply an RNN to each \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{S:2022:IWS, author = "Sruthi S. and B. Kannan and Binu Paul", title = "Improved Word Sense Determination in {Malayalam} using Latent {Dirichlet} Allocation and Semantic Features", journal = j-TALLIP, volume = "21", number = "2", pages = "37:1--37:11", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3476978", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3476978", abstract = "Recent years have witnessed phenomenal developments worldwide in the field of NLP. But developments in Indian regional languages are very few compared to them. This work is a step towards the construction of a target word sense disambiguation system in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saeed:2022:IFD, author = "Ali Saeed and Rao Muhammad Adeel Nawab and Mark Stevenson", title = "Investigating the Feasibility of Deep Learning Methods for {Urdu} Word Sense Disambiguation", journal = j-TALLIP, volume = "21", number = "2", pages = "38:1--38:16", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3477578", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3477578", abstract = "Word Sense Disambiguation (WSD), the process of automatically identifying the correct meaning of a word used in a given context, is a significant challenge in Natural Language Processing. A range of approaches to the problem has been explored by the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Katyayan:2022:DAR, author = "Pragya Katyayan and Nisheeth Joshi", title = "Development of Automatic Rule-based Semantic Tagger and {Karaka} Analyzer for {Hindi}", journal = j-TALLIP, volume = "21", number = "2", pages = "39:1--39:25", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3479155", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3479155", abstract = "Hindi is the third most-spoken language in the world (615 million speakers) and has the fourth highest native speakers (341 million). It is an inflectionally rich and relatively free word-order language with an immense vocabulary set. Despite being such a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abbad:2022:SED, author = "Hamza Abbad and Shengwu Xiong", title = "Simple Extensible Deep Learning Model for Automatic {Arabic} Diacritization", journal = j-TALLIP, volume = "21", number = "2", pages = "40:1--40:16", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480938", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3480938", abstract = "Automatic diacritization is an Arabic natural language processing topic based on the sequence labeling task where the labels are the diacritics and the letters are the sequence elements. A letter can have from zero up to two diacritics. The dataset used \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2022:DAW, author = "Kaiyu Huang and Keli Xiao and Fengran Mo and Bo Jin and Zhuang Liu and Degen Huang", title = "Domain-Aware Word Segmentation for {Chinese} Language: a Document-Level Context-Aware Model", journal = j-TALLIP, volume = "21", number = "2", pages = "41:1--41:16", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3481298", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3481298", abstract = "Word segmentation is an essential and challenging task in natural language processing, especially for the Chinese language due to its high linguistic complexity. Existing methods for Chinese word segmentation, including statistical machine learning \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bai:2022:UPT, author = "Guirong Bai and Shizhu He and Kang Liu and Jun Zhao", title = "Using Pre-trained Language Model to Enhance Active Learning for Sentence Matching", journal = j-TALLIP, volume = "21", number = "2", pages = "42:1--42:19", month = mar, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3480937", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Mon Mar 28 11:35:36 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3480937", abstract = "Active learning is an effective method to substantially alleviate the problem of expensive annotation cost for data-driven models. Recently, pre-trained language models have been demonstrated to be powerful for learning language representations. In this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Shatnawi:2022:AHW, author = "Atallah Mahmoud Al-Shatnawi and Faisal Al-Saqqar and Alireza Souri", title = "{Arabic} Handwritten Word Recognition Based on Stationary Wavelet Transform Technique using Machine Learning", journal = j-TALLIP, volume = "21", number = "3", pages = "43:1--43:21", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474391", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474391", abstract = "This paper is aimed at improving the performance of the word recognition system (WRS) of handwritten Arabic text by extracting features in the frequency domain using the Stationary Wavelet Transform (SWT) method using machine learning, which is a wavelet \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nazir:2022:AAR, author = "Zulqarnain Nazir and Khurram Shahzad and Muhammad Kamran Malik and Waheed Anwar and Imran Sarwar Bajwa and Khawar Mehmood", title = "Authorship Attribution for a Resource Poor Language --- {Urdu}", journal = j-TALLIP, volume = "21", number = "3", pages = "44:1--44:23", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487061", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487061", abstract = "Authorship attribution refers to examining the writing style of authors to determine the likelihood of the original author of a document from a given set of potential authors. Due to the wide range of authorship attribution applications, a plethora of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sheikhaei:2022:JTL, author = "Mohammad Sadegh Sheikhaei and Hasan Zafari and Yuan Tian", title = "Joined Type Length Encoding for Nested Named Entity Recognition", journal = j-TALLIP, volume = "21", number = "3", pages = "45:1--45:23", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487057", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487057", abstract = "In this article, we propose a new encoding scheme for named entity recognition (NER) called Joined Type-Length encoding (JoinedTL). Unlike most existing named entity encoding schemes, which focus on flat entities, JoinedTL can label nested named entities \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:DVC, author = "Mei Li and Jiajun Zhang and Xiang Lu and Chengqing Zong", title = "Dual-View Conditional Variational Auto-Encoder for Emotional Dialogue Generation", journal = j-TALLIP, volume = "21", number = "3", pages = "46:1--46:18", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3481890", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3481890", abstract = "Emotional dialogue generation aims to generate appropriate responses that are content relevant with the query and emotion consistent with the given emotion tag. Previous work mainly focuses on incorporating emotion information into the sequence to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Raval:2022:IDL, author = "Deepang Raval and Vyom Pathak and Muktan Patel and Brijesh Bhatt", title = "Improving Deep Learning based Automatic Speech Recognition for {Gujarati}", journal = j-TALLIP, volume = "21", number = "3", pages = "47:1--47:18", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3483446", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3483446", abstract = "We present a novel approach for improving the performance of an End-to-End speech recognition system for the Gujarati language. We follow a deep learning-based approach that includes Convolutional Neural Network, Bi-directional Long Short Term Memory \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiang:2022:TTD, author = "Shu Jiang and Zuchao Li and Hai Zhao and Bao-Liang Lu and Rui Wang", title = "Tri-training for Dependency Parsing Domain Adaptation", journal = j-TALLIP, volume = "21", number = "3", pages = "48:1--48:17", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488367", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3488367", abstract = "In recent years, the research on dependency parsing focuses on improving the accuracy of the domain-specific (in-domain) test datasets and has made remarkable progress. However, there are innumerable scenarios in the real world that are not covered by the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2022:ECA, author = "Santosh Kumar Mishra and Gaurav Rai and Sriparna Saha and Pushpak Bhattacharyya", title = "Efficient Channel Attention Based Encoder-Decoder Approach for Image Captioning in {Hindi}", journal = j-TALLIP, volume = "21", number = "3", pages = "49:1--49:17", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3483597", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3483597", abstract = "Image captioning refers to the process of generating a textual description that describes objects and activities present in a given image. It connects two fields of artificial intelligence, computer vision, and natural language processing. Computer vision \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liao:2022:SLM, author = "Xianwen Liao and Yongzhong Huang and Peng Yang and Lei Chen", title = "A Statistical Language Model for Pre-Trained Sequence Labeling: a Case Study on {Vietnamese}", journal = j-TALLIP, volume = "21", number = "3", pages = "50:1--50:21", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3483524", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3483524", abstract = "By defining the computable word segmentation unit and studying its probability characteristics, we establish an unsupervised statistical language model (SLM) for a new pre-trained sequence labeling framework in this article. The proposed SLM is an \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2022:TCC, author = "Zhongguo Wang and Bao Zhang", title = "Toxic Comment Classification Based on Bidirectional Gated Recurrent Unit and Convolutional Neural Network", journal = j-TALLIP, volume = "21", number = "3", pages = "51:1--51:12", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488366", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3488366", abstract = "For English toxic comment classification, this paper presents the model that combines Bi-GRU and CNN optimized by global average pooling (BG-GCNN) based on the bidirectional gated recurrent unit (Bi-GRU) and global pooling optimized convolution neural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2022:OSR, author = "Jian Sun and Yu Zhou and Chengqing Zong", title = "One-Shot Relation Learning for Knowledge Graphs via Neighborhood Aggregation and Paths Encoding", journal = j-TALLIP, volume = "21", number = "3", pages = "52:1--52:19", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3484729", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3484729", abstract = "The relation learning between two entities is an essential task in knowledge graph (KG) completion that has received much attention recently. Previous work almost exclusively focused on relations widely seen in the original KGs, which means that enough \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Munir:2022:MAI, author = "Kashif Munir and Hongxiao Bai and Hai Zhao and Junhan Zhao", title = "Memorizing All for Implicit Discourse Relation Recognition", journal = j-TALLIP, volume = "21", number = "3", pages = "53:1--53:20", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485016", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3485016", abstract = "Implicit discourse relation recognition is a challenging task due to the absence of the necessary informative clues from explicit connectives. An implicit discourse relation recognizer has to carefully tackle the semantic similarity of sentence pairs and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Reddy:2022:FBA, author = "A. Pramod Reddy and Vijayarajan V.", title = "Fusion Based {AER} System Using Deep Learning Approach for Amplitude and Frequency Analysis", journal = j-TALLIP, volume = "21", number = "3", pages = "54:1--54:19", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488369", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3488369", abstract = "Automatic emotion recognition from Speech (AERS) systems based on acoustical analysis reveal that some emotional classes persist with ambiguity. This study employed an alternative method aimed at providing deep understanding into the amplitude-frequency, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2022:LJE, author = "Hu Zhang and Bangze Pan and Ru Li", title = "Legal Judgment Elements Extraction Approach with Law Article-aware Mechanism", journal = j-TALLIP, volume = "21", number = "3", pages = "55:1--55:15", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485244", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3485244", abstract = "Legal judgment elements extraction (LJEE) aims to identify the different judgment features from the fact description in legal documents automatically, which helps to improve the accuracy and interpretability of the judgment results. In real court rulings, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rani:2022:ABS, author = "Sujata Rani and Parteek Kumar", title = "Aspect-based Sentiment Analysis using Dependency Parsing", journal = j-TALLIP, volume = "21", number = "3", pages = "56:1--56:19", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485243", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3485243", abstract = "In this paper, an aspect-based Sentiment Analysis (SA) system for Hindi is presented. The proposed system assigns a separate sentiment towards the different aspects of a sentence as well as it evaluates the overall sentiment expressed in a sentence. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ishraq:2022:TDU, author = "Mir Ragib Ishraq and Nitesh Khadka and Asif Mohammed Samir and M. Shahidur Rahman", title = "Towards Developing Uniform Lexicon Based Sorting Algorithm for Three Prominent {Indo--Aryan} Languages", journal = j-TALLIP, volume = "21", number = "3", pages = "57:1--57:20", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488371", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3488371", abstract = "Three different Indic/Indo-Aryan languages --- Bengali, Hindi and Nepali have been explored here in character level to find out similarities and dissimilarities. Having shared the same root, the Sanskrit, Indic languages bear common characteristics. That is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pandey:2022:HAS, author = "Shilpa Pandey and Gaurav Harit", title = "Handwritten Annotation Spotting in Printed Documents Using Top-Down Visual Saliency Models", journal = j-TALLIP, volume = "21", number = "3", pages = "58:1--58:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485468", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3485468", abstract = "In this article, we address the problem of localizing text and symbolic annotations on the scanned image of a printed document. Previous approaches have considered the task of annotation extraction as binary classification into printed and handwritten \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kang:2022:ELT, author = "Xiaomian Kang and Yang Zhao and Jiajun Zhang and Chengqing Zong", title = "Enhancing Lexical Translation Consistency for Document-Level Neural Machine Translation", journal = j-TALLIP, volume = "21", number = "3", pages = "59:1--59:21", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3485469", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3485469", abstract = "Document-level neural machine translation (DocNMT) has yielded attractive improvements. In this article, we systematically analyze the discourse phenomena in Chinese-to-English translation, and focus on the most obvious ones, namely lexical translation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Azmi:2022:LDR, author = "Aqil M. Azmi and Rehab M. Alnefaie and Hatim A. Aboalsamh", title = "Light Diacritic Restoration to Disambiguate Homographs in Modern {Arabic} Texts", journal = j-TALLIP, volume = "21", number = "3", pages = "60:1--60:14", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3486675", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3486675", abstract = "Diacritic restoration (also known as diacritization or vowelization) is the process of inserting the correct diacritical markings into a text. Modern Arabic is typically written without diacritics, e.g., newspapers. This lack of diacritical markings often \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guo:2022:ACD, author = "Aibo Guo and Xinyi Li and Ning Pang and Xiang Zhao", title = "Adversarial Cross-domain Community Question Retrieval", journal = j-TALLIP, volume = "21", number = "3", pages = "61:1--61:22", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487291", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487291", abstract = "Community Q\&A forum is a special type of social media that provides a platform to raise questions and to answer them (both by forum participants), to facilitate online information sharing. Currently, community Q\&A forums in professional domains have \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2022:CSS, author = "Shaolei Wang and Zhongyuan Wang and Wanxiang Che and Sendong Zhao and Ting Liu", title = "Combining Self-supervised Learning and Active Learning for Disfluency Detection", journal = j-TALLIP, volume = "21", number = "3", pages = "62:1--62:25", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487290", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487290", abstract = "Spoken language is fundamentally different from the written language in that it contains frequent disfluencies or parts of an utterance that are corrected by the speaker. Disfluency detection (removing these disfluencies) is desirable to clean the input \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Warjri:2022:PSP, author = "Sunita Warjri and Partha Pakray and Saralin A. Lyngdoh and Arnab Kumar Maji", title = "Part-of-Speech {(POS)} Tagging Using Deep Learning-Based Approaches on the Designed {Khasi} {POS} Corpus", journal = j-TALLIP, volume = "21", number = "3", pages = "63:1--63:24", month = may, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3488381", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Apr 5 06:29:03 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3488381", abstract = "Part-of-speech (POS) tagging is one of the research challenging fields in natural language processing (NLP). It requires good knowledge of a particular language with large amounts of data or corpora for feature engineering, which can lead to achieving a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hastuti:2022:QLS, author = "Rochana Prih Hastuti and Yohanes Suyanto and Anny Kartika Sari", title = "{Q}-Learning for Shift-Reduce Parsing in {Indonesian} Tree-{LSTM}-Based Text Generation", journal = j-TALLIP, volume = "21", number = "4", pages = "64:1--64:15", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3490501", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3490501", abstract = "Tree-LSTM algorithm accommodates tree structure processing to extract information outside the linear sequence pattern. The use of Tree-LSTM in text generation problems requires the help of an external parser at each generation iteration. Developing a good \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lee:2022:CEB, author = "Lung-Hao Lee and Jian-Hong Li and Liang-Chih Yu", title = "{Chinese} {EmoBank}: Building Valence-Arousal Resources for Dimensional Sentiment Analysis", journal = j-TALLIP, volume = "21", number = "4", pages = "65:1--65:18", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3489141", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3489141", abstract = "An increasing amount of research has recently focused on dimensional sentiment analysis that represents affective states as continuous numerical values on multiple dimensions, such as valence-arousal (VA) space. Compared to the categorical approach that \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2022:DDG, author = "Shanxiong Chen and Ye Yang and Xuxin Liu and Shiyu Zhu", title = "Dual Discriminator {GAN}: Restoring Ancient Yi Characters", journal = j-TALLIP, volume = "21", number = "4", pages = "66:1--66:23", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3490031", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3490031", abstract = "In China, the damage of ancient Yi books are serious. Due to the lack of ancient Yi experts, the repairation of ancient Yi books is progressing very slowly. The artificial intelligence is successful in the field of image and text, so it is feasible for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "66", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jana:2022:HDL, author = "Abhik Jana and Gopalakrishnan Venkatesh and Seid Muhie Yimam and Chris Biemann", title = "Hypernymy Detection for Low-resource Languages: a Study for {Hindi, Bengali, and Amharic}", journal = j-TALLIP, volume = "21", number = "4", pages = "67:1--67:21", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3490389", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3490389", abstract = "Numerous attempts for hypernymy relation (e.g., dog ``is-a'' animal) detection have been made for resourceful languages like English, whereas efforts made for low-resource languages are scarce primarily due to lack of gold-standard datasets and suitable \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mao:2022:LDM, author = "Zhuoyuan Mao and Chenhui Chu and Sadao Kurohashi", title = "Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation", journal = j-TALLIP, volume = "21", number = "4", pages = "68:1--68:29", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3491065", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3491065", abstract = "In the present study, we propose novel sequence-to-sequence pre-training objectives for low-resource machine translation (NMT): Japanese-specific sequence to sequence (JASS) for language pairs involving Japanese as the source or target language, and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abderrahim:2022:AWS, author = "Mohammed Alaeddine Abderrahim and Mohammed El-Amine Abderrahim", title = "{Arabic} Word Sense Disambiguation for Information Retrieval", journal = j-TALLIP, volume = "21", number = "4", pages = "69:1--69:19", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510451", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3510451", abstract = "In the context of using semantic resources for information retrieval, the relationship and distance between concepts are considered important for word sense disambiguation. In this article, we experiment with Conceptual Density and Random Walk with graph \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2022:ERC, author = "Hongchao Ma and Zhongqing Wang and Xiabing Zhou and Guodong Zhou and Qinglei Zhou", title = "Emotion Recognition with Conversational Generation Transfer", journal = j-TALLIP, volume = "21", number = "4", pages = "70:1--70:17", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494532", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3494532", abstract = "Emotion recognition in conversation is one of the essential tasks of natural language processing. However, this task's annotation data is insufficient since such data is hard to collect and annotate. Meanwhile, there is large-scale data for conversational \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2022:CEE, author = "Xiaohua Wu and Tengrui Wang and Youping Fan and Fangjian Yu", title = "{Chinese} Event Extraction via Graph Attention Network", journal = j-TALLIP, volume = "21", number = "4", pages = "71:1--71:12", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3494533", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3494533", abstract = "Event extraction plays an important role in natural language processing (NLP) applications, including question answering and information retrieval. Most of the previous state-of-the-art methods were lack of ability in capturing features in long range. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cui:2022:IGD, author = "Yiming Cui and Wanxiang Che and Ziqing Yang and Ting Liu and Bing Qin and Shijin Wang and Guoping Hu", title = "Interactive Gated Decoder for Machine Reading Comprehension", journal = j-TALLIP, volume = "21", number = "4", pages = "72:1--72:19", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501399", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3501399", abstract = "Owing to the availability of various large-scale Machine Reading Comprehension (MRC) datasets, building an effective model to extract passage spans for question answering has been well studied in previous works. However, in reality, there are some \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Husain:2022:IEP, author = "Fatemah Husain and Ozlem Uzuner", title = "Investigating the Effect of Preprocessing {Arabic} Text on Offensive Language and Hate Speech Detection", journal = j-TALLIP, volume = "21", number = "4", pages = "73:1--73:20", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501398", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3501398", abstract = "Preprocessing of input text can play a key role in text classification by reducing dimensionality and removing unnecessary content. This study aims to investigate the impact of preprocessing on Arabic offensive language classification. We explore six \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gogoi:2022:LLR, author = "Arjun Gogoi and Nomi Baruah", title = "A Lemmatizer for Low-resource Languages: {WSD} and Its Role in the {Assamese} Language", journal = j-TALLIP, volume = "21", number = "4", pages = "74:1--74:22", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502157", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3502157", abstract = "The morphological variations of highly inflected languages that appear in a text impede the progress of computer processing and root word determination tasks while extracting an abstract. As a remedy to this difficulty, a lemmatization algorithm is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Harrag:2022:AFN, author = "Fouzi Harrag and Mohamed Khalil Djahli", title = "{Arabic} Fake News Detection: a Fact Checking Based Deep Learning Approach", journal = j-TALLIP, volume = "21", number = "4", pages = "75:1--75:34", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501401", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3501401", abstract = "Fake news stories can polarize society, particularly during political events. They undermine confidence in the media in general. Current NLP systems are still lacking the ability to properly interpret and classify Arabic fake news. Given the high stakes \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{P:2022:TSS, author = "Jasir M. P. and Kannan Balakrishnan", title = "Text-to-Speech Synthesis: Literature Review with an Emphasis on {Malayalam} Language", journal = j-TALLIP, volume = "21", number = "4", pages = "76:1--76:56", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3501397", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3501397", abstract = "Text-to-Speech Synthesis (TTS) is an active area of research to generate synthetic speech from underlying text. The identified syllables are uttered with proper duration and prosody characteristics to emulate natural speech. It falls under the category of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qin:2022:MDS, author = "Libo Qin and Fuxuan Wei and Minheng Ni and Yue Zhang and Wanxiang Che and Yangming Li and Ting Liu", title = "Multi-domain Spoken Language Understanding Using Domain- and Task-aware Parameterization", journal = j-TALLIP, volume = "21", number = "4", pages = "77:1--77:17", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502198", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3502198", abstract = "Spoken language understanding (SLU) has been addressed as a supervised learning problem, where a set of training data is available for each domain. However, annotating data for a new domain can be both financially costly and non-scalable. One existing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qin:2022:ACE, author = "Yanxia Qin and Zhongqing Wang and Yue Zhang and Kehai Chen and Min Zhang", title = "Advancing {Chinese} Event Detection via Revisiting Character Information", journal = j-TALLIP, volume = "21", number = "4", pages = "78:1--78:9", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502197", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3502197", abstract = "Recently, character information has been successfully introduced into the encoder-decoder event detection model to relieve the trigger-word mismatch problem, thus achieving impressive results in the languages without natural delimiters (i.e., Chinese). \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2022:WSD, author = "Goonjan Jain and D. K. Lobiyal", title = "Word Sense Disambiguation using Cooperative Game Theory and Fuzzy {Hindi} {WordNet} based on {ConceptNet}", journal = j-TALLIP, volume = "21", number = "4", pages = "79:1--79:25", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3502739", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3502739", abstract = "Natural Language is fuzzy in nature. The fuzziness of Hindi language was captured in the Fuzzy Hindi WordNet (FHWN). FHWN assigned membership values to fuzzy relationships by consulting experts from various domains. However, these membership values need \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Manerkar:2022:KWC, author = "Sanjana Manerkar and Kavita Asnani and Preeti Ravindranath Khorjuvenkar and Shilpa Desai and Jyoti D. Pawar", title = "{Konkani WordNet}: Corpus-Based Enhancement using Crowdsourcing", journal = j-TALLIP, volume = "21", number = "4", pages = "80:1--80:18", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503156", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3503156", abstract = "Konkani is one of the languages included in the eighth schedule of the Indian constitution. It is the official language of Goa and is spoken mainly in Goa and some places in Karnataka and Kerala. Konkani WordNet or Konkani Shabdamalem (komkani 'sabdamalem) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2022:MMR, author = "Junyi Chen and Lan Du and Ming Liu and Xiabing Zhou", title = "{Mulan}: a Multiple Residual Article-Wise Attention Network for Legal Judgment Prediction", journal = j-TALLIP, volume = "21", number = "4", pages = "81:1--81:15", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3503157", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3503157", abstract = "Legal judgment prediction (LJP) is used to predict judgment results based on the description of individual legal cases. In order to be more suitable for actual application scenarios in which the case has cited multiple articles and has multiple charges, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guo:2022:HNT, author = "H. Guo and N. Dong and J. Y. Zhao and Y. F. Liu", title = "Handwritten New {Tai Lue} Character Recognition Using Convolutional Prior Features and Deep Variationally Sparse {Gaussian} Process Modeling", journal = j-TALLIP, volume = "21", number = "4", pages = "82:1--82:25", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3506700", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3506700", abstract = "New Tai Lue is widely used in Southwest China and Southeast Asia. Hence, it is important to study related handwritten character recognition. Considering the many similar characters in handwritten New Tai Lue, this paper proposes an offline handwritten New \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mahajan:2022:WLS, author = "Shilpa Mahajan and Rajneesh Rani", title = "Word Level Script Identification Using Convolutional Neural Network Enhancement for Scenic Images", journal = j-TALLIP, volume = "21", number = "4", pages = "83:1--83:29", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3506699", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3506699", abstract = "Script identification from complex and colorful images is an integral part of the text recognition and classification system. Such images may contain twofold challenges: (1) Challenges related to the camera like blurring effect, non-uniform illumination \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alshammari:2022:CNS, author = "Nasser O. Alshammari and Fawaz D. Alharbi", title = "Combining a Novel Scoring Approach with {Arabic} Stemming Techniques for {Arabic} Chatbots Conversation Engine", journal = j-TALLIP, volume = "21", number = "4", pages = "84:1--84:21", month = jul, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511215", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 18 08:42:14 MDT 2022", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511215", abstract = "Arabic is recognized as one of the main languages around the world. Many attempts and efforts have been done to provide computing solutions to support the language. Developing Arabic chatbots is still an evolving research field and requires extra efforts \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shang:2022:IHD, author = "Rui Shang and Xia Li", title = "Improved Heuristic Data Management and Protection Algorithm for Digital {China} Cultural Datasets", journal = j-TALLIP, volume = "21", number = "5", pages = "85:1--85:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3394114", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3394114", abstract = "In the present scenario sustainable management and protection of digital cultural datasets are considered as a significant area of research. In the recent past, the protection and management of cultural data are facing several new challenges and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Canhasi:2022:AFN, author = "Ercan Canhasi and Rexhep Shijaku and Erblin Berisha", title = "{Albanian} Fake News Detection", journal = j-TALLIP, volume = "21", number = "5", pages = "86:1--86:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3487288", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3487288", abstract = "Recent years have witnessed the vast increase of the phenomenon known as the fake news. Among the main reasons for this increase are the continuous growth of internet and social media usage and the real-time information dissemination opportunity offered \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmed:2022:FCS, author = "Usman Ahmed and Jerry Chun-Wei Lin and Gautam Srivastava", title = "Fuzzy Contrast Set Based Deep Attention Network for Lexical Analysis and Mental Health Treatment", journal = j-TALLIP, volume = "21", number = "5", pages = "87:1--87:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3506701", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3506701", abstract = "Internet-delivered psychological treatments (IDPT) consider mental problems based on Internet interaction. With such increased interaction because of the COVID-19 pandemic, more online tools have been widely used to provide evidence-based mental health \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fadte:2022:AAV, author = "Swapnil Fadte and Edna Vaz Fernandes and Ramdas Karmali and Jyoti D. Pawar", title = "Acoustic Analysis of Vowels in {Konkani}", journal = j-TALLIP, volume = "21", number = "5", pages = "88:1--88:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3474358", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474358", abstract = "Konkani is an under-resourced language mainly spoken on the west coast of India. Although linguistic analyses of vowel sounds in various dialects of Konkani have been done in the past, more accurate analysis of Konkani vowels, especially an acoustic-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Otoom:2022:NSI, author = "Mwaffaq Otoom and Mohammad A. Alzubaidi and Lina Nasr Abd Al-Raziq", title = "A Novel Social Interaction Assistive Device for {Arab} Deaf People", journal = j-TALLIP, volume = "21", number = "5", pages = "89:1--89:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508374", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3508374", abstract = "Many deaf people worldwide face problems with integrating into society and interacting with people who do not understand sign language. This can lead to isolation and difficulty in expressing feelings. In this research, our primary goal is to help deaf \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "89", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pathak:2022:RAI, author = "Dhrubajyoti Pathak and Sukumar Nandi and Priyankoo Sarmah", title = "Reduplication in {Assamese}: Identification and Modeling", journal = j-TALLIP, volume = "21", number = "5", pages = "90:1--90:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510419", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3510419", abstract = "Reduplication is a productive morphological process widely used in a substantial number of languages in the world. Reduplication is a well-studied phenomenon, and several typological works have provided evidence for different types of reduplication in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "90", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Su:2022:INM, author = "Chao Su and Heyan Huang and Shumin Shi and Ping Jian", title = "Improving Neural Machine Translation by Transferring Knowledge from Syntactic Constituent Alignment Learning", journal = j-TALLIP, volume = "21", number = "5", pages = "91:1--91:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510580", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3510580", abstract = "Statistical machine translation (SMT) models rely on word-, phrase-, and syntax-level alignments. But neural machine translation (NMT) models rarely explicitly learn the phrase- and syntax-level alignments. In this article, we propose to improve NMT by \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "91", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mukhtar:2022:IUA, author = "Neelam Mukhtar and Mohammad Abid Khan and Nadia Chiragh and Shah Nazir and Asim Ullah Jan", title = "An Intelligent Unsupervised Approach for Handling Context-Dependent Words in {Urdu} Sentiment Analysis", journal = j-TALLIP, volume = "21", number = "5", pages = "92:1--92:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3510830", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3510830", abstract = "The characteristic of context dependency in Urdu words needs to be handled carefully while performing Urdu sentiment analysis. In this research, an already constructed Urdu sentiment lexicon of positive and negative words is further expanded by the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "92", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nasim:2022:ALC, author = "Zarmeen Nasim and Sajjad Haider", title = "Automatic Labeling of Clusters for a Low-Resource {Urdu} Language", journal = j-TALLIP, volume = "21", number = "5", pages = "93:1--93:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511097", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511097", abstract = "Document clustering techniques often produce clusters that require human intervention to interpret the meaning of such clusters. Automatic cluster labeling refers to the process of assigning a meaningful phrase to a cluster as a label. This article \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "93", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2022:EMC, author = "Songming Zhang and Ying Zhang and Yufeng Chen and Du Wu and Jinan Xu and Jian Liu", title = "Exploiting Morpheme and Cross-lingual Knowledge to Enhance {Mongolian} Named Entity Recognition", journal = j-TALLIP, volume = "21", number = "5", pages = "94:1--94:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511098", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511098", abstract = "Mongolian named entity recognition (NER) is not only one of the most crucial and fundamental tasks in Mongolian natural language processing, but also an important step to improve the performance of downstream tasks such as information retrieval, machine \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "94", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:CBJ, author = "Yan Li and Xiaomin Li and Yiru Wang and Hui Lv and Fenfang Li and La Duo", title = "Character-based Joint Word Segmentation and Part-of-Speech Tagging for {Tibetan} Based on Deep Learning", journal = j-TALLIP, volume = "21", number = "5", pages = "95:1--95:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511600", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511600", abstract = "Tibetan word segmentation and POS tagging are the primary tasks of Tibetan natural language processing. Most of existing methods of Tibetan word segmentation and POS tagging are based on rules and statistics, which need manual construction of features. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "95", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hu:2022:CAI, author = "Gangqiang Hu and Shengfei Lyu and Xingyu Wu and Jinlong Li and Huanhuan Chen", title = "Contextual-Aware Information Extractor with Adaptive Objective for {Chinese} Medical Dialogues", journal = j-TALLIP, volume = "21", number = "5", pages = "96:1--96:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511602", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511602", abstract = "Electronic Medical Records (EMRs) are the foundation of modern medical information systems. Despite the benefits of EMRs, the exhausting process of constructing EMRs decreases the efficiency of medical consultation. Therefore, it becomes an emerging \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "96", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rafi-Ur-Rashid:2022:BCC, author = "Md. Rafi-Ur-Rashid and Mahim Mahbub and Muhammad Abdullah Adnan", title = "Breaking the Curse of Class Imbalance: {Bangla} Text Classification", journal = j-TALLIP, volume = "21", number = "5", pages = "97:1--97:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511601", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511601", abstract = "This article addresses the class imbalance issue in a low-resource language called Bengali. As a use-case, we choose one of the most fundamental NLP tasks, i.e., text classification, where we utilize three benchmark text corpora: fake-news dataset, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "97", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chaudhury:2022:DOD, author = "Ayan Chaudhury and Partha Sarathi Mukherjee and Sudip Das and Chandan Biswas and Ujjwal Bhattacharya", title = "A Deep {OCR} for Degraded {Bangla} Documents", journal = j-TALLIP, volume = "21", number = "5", pages = "98:1--98:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511807", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511807", abstract = "Despite the significant success of document image analysis techniques, efficient Optical Character Recognition (OCR) of degraded document images still remains an open problem. Although a body of work has been reported on degraded document recognition for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "98", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmadi:2022:LMN, author = "Sina Ahmadi and Hossein Hassani and Daban Q. Jaff", title = "Leveraging Multilingual News {Websites} for Building a {Kurdish} Parallel Corpus", journal = j-TALLIP, volume = "21", number = "5", pages = "99:1--99:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511806", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511806", abstract = "Machine translation has been a major motivation of development in natural language processing. Despite the burgeoning achievements in creating more efficient machine translation systems, thanks to deep learning methods, parallel corpora have remained \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "99", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaur:2022:IFE, author = "Kamaldeep Kaur and Parminder Singh", title = "Impact of Feature Extraction and Feature Selection Algorithms on {Punjabi} Speech Emotion Recognition Using Convolutional Neural Network", journal = j-TALLIP, volume = "21", number = "5", pages = "100:1--100:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3511888", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3511888", abstract = "As a challenge to refine the spontaneity and productivity of a machine and human coherence, speech emotion recognition has been an overriding area of research. The trustability and fulfillment of emotion recognition are largely involved with the feature \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "100", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2022:DBD, author = "Krishan Kumar", title = "{DEAF-BSL}: {Deep lEArning Framework for British Sign Language} recognition", journal = j-TALLIP, volume = "21", number = "5", pages = "101:1--101:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3513004", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3513004", abstract = "The recent development of disability studies in academic bodies has expedited the promotion of investigation on disability. With computer-aided tools, communication between the impaired person and someone who does not understand sign language could be \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "101", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Verma:2022:CAE, author = "Vikas Verma and S. K. Sharma", title = "Critical Analysis of Existing {Punjabi} Grammar Checker and a Proposed Hybrid Framework Involving Machine Learning and Rule-Base Criteria", journal = j-TALLIP, volume = "21", number = "5", pages = "102:1--102:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3514237", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3514237", abstract = "An important area of research involving Artificial Intelligence (AI) is Natural Language Processing (NLP). The objective of training a machine is to imitate and manipulate text and speech of humans. Progressive research is undertaken to find connections \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "102", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shi:2022:LRN, author = "Shumin Shi and Xing Wu and Rihai Su and Heyan Huang", title = "Low-resource Neural Machine Translation: Methods and Trends", journal = j-TALLIP, volume = "21", number = "5", pages = "103:1--103:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524300", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3524300", abstract = "Neural Machine Translation (NMT) brings promising improvements in translation quality, but until recently, these models rely on large-scale parallel corpora. As such corpora only exist on a handful of language pairs, the translation performance is far \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "103", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mamta:2022:EML, author = "Mamta and Asif Ekbal and Pushpak Bhattacharyya", title = "Exploring Multi-lingual, Multi-task, and Adversarial Learning for Low-resource Sentiment Analysis", journal = j-TALLIP, volume = "21", number = "5", pages = "104:1--104:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3514498", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3514498", abstract = "Deep learning has become most prominent in solving various Natural Language Processing (NLP) tasks including sentiment analysis. However, these techniques require a considerably large amount of annotated corpus, which is not easy to obtain for most of the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "104", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nguyen:2022:NVC, author = "Kiet Van Nguyen and Tin Van Huynh and Duc-Vu Nguyen and Anh Gia-Tuan Nguyen and Ngan Luu-Thuy Nguyen", title = "New {Vietnamese} Corpus for Machine Reading Comprehension of Health News Articles", journal = j-TALLIP, volume = "21", number = "5", pages = "105:1--105:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527631", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527631", abstract = "Machine reading comprehension is a natural language understanding task where the computing system is required to read a text and then find the answer to a specific question posed by a human. Large-scale and high-quality corpora are necessary for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "105", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mehra:2022:BMB, author = "Pramod Mehra and Shashi Kant Verma", title = "{BERIS}: an {mBERT}-based Emotion Recognition Algorithm from {Indian} Speech", journal = j-TALLIP, volume = "21", number = "5", pages = "106:1--106:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517195", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3517195", abstract = "Emotions, the building blocks of the human intellect, play a vital role in Artificial Intelligence (AI). For a robust AI-based machine, it is important that the machine understands human emotions. COVID-19 has introduced the world to no-touch intelligent \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "106", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2022:NTM, author = "Amit Kumar and Nazanin Esmaili and Massimo Piccardi", title = "Neural Topic Model Training with the {REBAR} Gradient Estimator", journal = j-TALLIP, volume = "21", number = "5", pages = "107:1--107:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517336", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3517336", abstract = "Topic modelling is an important approach of unsupervised machine learning that allows automatically extracting the main ``topics'' from large collections of documents. In addition, topic modelling is able to identify the topic proportions of each individual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "107", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ren:2022:DUB, author = "Feiliang Ren and Yongkang Liu and Bochao Li and Zhibo Wang and Yu Guo and Shilei Liu and Huimin Wu and Jiaqi Wang and Chunchao Liu and Bingchao Wang", title = "Deep Understanding Based Multi-Document Machine Reading Comprehension", journal = j-TALLIP, volume = "21", number = "5", pages = "108:1--108:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3519296", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3519296", abstract = "Most existing multi-document machine reading comprehension models mainly focus on understanding the interactions between the input question and documents, but ignore the following two kinds of understandings. First, to understand the semantic meaning of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "108", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Maity:2022:TSA, author = "Shuverthi Maity and Kamal Sarkar", title = "Topic Sentiment Analysis for {Twitter} Data in {Indian} Languages Using Composite Kernel {SVM} and Deep Learning", journal = j-TALLIP, volume = "21", number = "5", pages = "109:1--109:??", month = sep, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3519297", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 17 07:33:39 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3519297", abstract = "Sentiment analysis of public opinions on social networks, such as Twitter or Facebook, can provide us with valuable information, which has a wide range of applications. But the efficiency and accuracy of the automated methods for Twitter sentiment \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "109", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } %%% [31-Mar-2023] TO DO: v21n6 has not yet been published @Article{Yinying:2022:DMT, author = "Cai Yinying and Juan Li and Bo Wang", title = "Data Mining Techniques and Machine Learning Algorithms in the Multimedia System to Enhance Engineering Education", journal = j-TALLIP, volume = "21", number = "6", pages = "112:1--112:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517805", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3517805", abstract = "In the current digital era, engineering education worldwide faces a massive challenge in education and career development. By authorizing educators and administrators to migrate to the actions, cloud services technology has transformed into the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "112", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2022:EMA, author = "Bo Xu and Jing Jiang", title = "Exploitation for Multimedia {Asian} Information Processing and Artificial Intelligence-based Art Design and Teaching in Colleges", journal = j-TALLIP, volume = "21", number = "6", pages = "114:1--114:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3526219", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3526219", abstract = "Artificial intelligence has been widely used in art education and learning due to its quick progress. Any creation made with the help of artificial intelligence is referred to as art design. It covers works generated independently by AI systems and works \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "114", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:CMC, author = "Zhiqin Li", title = "Construction of Marketing Curriculum System Based on Blending Learning ``$ 3 + 2 $'' Joint Training of Higher Vocational and Undergraduate Education Using {NLP} for Marketing Document Management and Information Retrieval", journal = j-TALLIP, volume = "21", number = "6", pages = "115:1--115:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524113", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3524113", abstract = "The blended learning system provides educational tools to a student in accordance with the student's expressed educational interests, and it is a mix of online training and assignments, giving them more control over the learning and other developmental \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "115", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:RIA, author = "Changzhen Li and Xin Zhao", title = "Research on the Influence of Artificial Intelligence Technology with {Web 3.0} on Accounting Education and Its Countermeasures", journal = j-TALLIP, volume = "21", number = "6", pages = "116:1--116:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527666", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527666", abstract = "With the continuous development of artificial intelligence (AI) technology, AI technology has been widely used in various fields of social and economic life, bringing great impact to various industries. AI is a section of computer science, a new science \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "116", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2022:ESI, author = "Zhaojun Chen and Yanhai Bao and Tongxun Zhu", title = "An Empirical Study on {IPO} Model Construction of Undergraduate Education Quality Evaluation in {China} from the Statistical Pattern Recognition Approach In {NLP}", journal = j-TALLIP, volume = "21", number = "6", pages = "117:1--117:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3543851", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3543851", abstract = "Based on the analysis of 1,497 samples from the survey of national undergraduate educational administrators, the IPO model of undergraduate education quality evaluation from the perspective of managers can be effectively verified. The quality of higher \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "117", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2022:RTR, author = "Yanhong Ding and Na Yang", title = "Research on Teaching Reform of Innovative and Entrepreneurial Talents Training Course in Local Undergraduate Universities Based on Intelligent Blended Learning", journal = j-TALLIP, volume = "21", number = "6", pages = "119:1--119:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3529393", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3529393", abstract = "In order to make college development better accord with the development demand of regional society, the innovation and entrepreneurship (IE) training course reform in local undergraduate colleges is studied. To begin with, the research background as well \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "119", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiang:2022:RBL, author = "Wenxia Jiang and Qingna Lin and Wei Qin", title = "Research on Blended Learning Evaluation Method of Overseas {Chinese} Education in the Post-epidemic Era", journal = j-TALLIP, volume = "21", number = "6", pages = "120:1--120:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3542926", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3542926", abstract = "The spread of COVID-19 in the world has changed the way of life, economy, society, learning, and work around the world. Under the background of normalization of epidemic prevention and control, how to carry out overseas Chinese education and how to affect \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "120", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhong:2022:CSS, author = "Deren Zhong", title = "A Case Study for Sports Characteristic Town: Enlightenment to the Sport Industry", journal = j-TALLIP, volume = "21", number = "6", pages = "121:1--121:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3517915", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3517915", abstract = "The characteristic sports town is proposed to promote urban and rural integration by implementing a national-level fitness strategy. It plays a crucial role in the sports industry's promotion, launch, and supply. The characteristic town for sports creates \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "121", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiang:2022:SCE, author = "Guolei Jiang", title = "A Study on the Construction of the Evaluation System of the Teaching Ability of Students using Pattern Recognition for Studying Majoring in Badminton in the Mixed Learning Model of Physical Education Majors and Self-Learning System", journal = j-TALLIP, volume = "21", number = "6", pages = "123:1--123:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527607", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527607", abstract = "With my country's ongoing education reform and the continuous development of information technology-enabled education methods, the teaching environment and conditions of various universities have greatly improved, and information technology is changing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "123", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:MAC, author = "Yingtao Li", title = "Modeling and Analysis of {Chinese} Culture and Communication", journal = j-TALLIP, volume = "21", number = "6", pages = "124:1--124:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3514238", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3514238", abstract = "Along with many other Asian countries, China's communication differs from, and at times, conflicts with, the United States, which is considered more collectivist and low-contact than that of the United States. The topic of this article is mental, physical,. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "124", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2022:FSP, author = "Yun Zhao and Dexi Liu and Changxuan Wan and Xiping Liu and Xiangqing Qiu and Jianyun Nie", title = "Find Supports for the Post about Mental Issues: More Than Semantic Matching", journal = j-TALLIP, volume = "21", number = "6", pages = "126:1--126:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3508373", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3508373", abstract = "Mental-health-oriented question-answering (MH-QA) aims at retrieving an appropriate response for a question post involving mental health issues, which will be used to assist counsellors to reply to the support seeker. This task is different from the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "126", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2022:ANE, author = "Bo Liu and Zhuo Su and Guangzhi Qu", title = "{AIP}: a Named Entity Recognition Method Combining Glyphs and Sounds", journal = j-TALLIP, volume = "21", number = "6", pages = "127:1--127:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3522736", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3522736", abstract = "In recent years, a large number of Chinese electronic texts have been produced in the process of information construction in various fields. Identifying specific entities in these electronic texts has become a major research focus. Most existing research \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "127", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dey:2022:OIS, author = "Spandan Dey and Md Sahidullah and Goutam Saha", title = "An Overview of {Indian} Spoken Language Recognition from Machine Learning Perspective", journal = j-TALLIP, volume = "21", number = "6", pages = "128:1--128:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3523179", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3523179", abstract = "Automatic spoken language identification (LID) is a very important research field in the era of multilingual voice-command-based human-computer interaction. A front-end LID module helps to improve the performance of many speech-based applications in the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "128", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Niraula:2022:LTE, author = "Nobal B. Niraula and Saurab Dulal and Diwa Koirala", title = "Linguistic Taboos and Euphemisms in {Nepali}", journal = j-TALLIP, volume = "21", number = "6", pages = "129:1--129:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3524111", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3524111", abstract = "Languages across the world have words, phrases, and behaviors-the taboos-that are avoided in public communication considering them as obscene or disturbing to the social, religious, and ethical values of society. However, people deliberately use these \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "129", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:DSC, author = "Zuchao Li and Hai Zhao and Junru Zhou and Kevin Parnow and Shexia He", title = "Dependency and Span, Cross-Style Semantic Role Labeling on {PropBank} and {NomBank}", journal = j-TALLIP, volume = "21", number = "6", pages = "130:1--130:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3526214", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3526214", abstract = "The latest developments in neural semantic role labeling (SRL) have shown great performance improvements with both the dependency and span formalism/styles. Although the two styles share many similarities in linguistic meaning and computation, most \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "130", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2022:OTC, author = "Linqing Chen and Junhui Li and Zhengxian Gong and Min Zhang and Guodong Zhou", title = "One Type Context Is Not Enough: Global Context-aware Neural Machine Translation", journal = j-TALLIP, volume = "21", number = "6", pages = "131:1--131:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3526215", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3526215", abstract = "How to effectively model global context has been a critical challenge for document-level neural machine translation (NMT). Both preceding and global context have been carefully explored in the sequence-to-sequence (seq2seq) framework. However, previous \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "131", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guan:2022:RGS, author = "Mengyu Guan and Zhongqing Wang and Guodong Zhou", title = "Response Generation via Structure-Aware Constraints", journal = j-TALLIP, volume = "21", number = "6", pages = "132:1--132:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3526216", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3526216", abstract = "End-to-end neural modeling with the encoder-decoder architecture has shown great promise in response generation. However, it often generates dull and generic responses due to its failure to effectively perceive various kinds of act, sentiment, and topic \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "132", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wan:2022:CCC, author = "Qizhi Wan and Changxuan Wan and Keli Xiao and Dexi Liu and Qing Liu and Jiangling Deng and Wenkang Luo and Rong Hu", title = "Construction of a {Chinese} Corpus for Multi-Type Economic Event Relation", journal = j-TALLIP, volume = "21", number = "6", pages = "133:1--133:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527240", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527240", abstract = "We construct a Chinese Economic Event Treebank (CEETB), focusing on revealing economic and finance events and their relations. Investigating economic event relations will benefit academic research and practice in not just economics but many other \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "133", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2022:SBD, author = "Fenfang Li and Hui Lv and Duo La and Binbin Yong and Qingguo Zhou", title = "Sentence Boundary Disambiguation for {Tibetan} Based on Attention Mechanism at the Syllable Level", journal = j-TALLIP, volume = "21", number = "6", pages = "134:1--134:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527663", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527663", abstract = "Tibetan is a low-resource language with few existing electronic reference materials. The goal of Tibetan sentence boundary disambiguation (SBD) is to segment long text into sentences, and it is the foundation for downstream tasks corpora building. This \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "134", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2022:CFO, author = "Qi Chen and Oi Yee Kwong and Yinqiao Li and Tong Xiao and Jingbo Zhu", title = "Coarse-to-Fine Output Predictions for Efficient Decoding in Neural Machine Translation", journal = j-TALLIP, volume = "21", number = "6", pages = "135:1--135:??", month = nov, year = "2022", CODEN = "????", DOI = "https://doi.org/10.1145/3527664", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:41:59 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3527664", abstract = "Neural Machine Translation (NMT) systems are undesirably slow as the decoder often has to compute probability distributions over large target vocabularies. In this work, we propose a coarse-to-fine approach to reduce the complexity of the decoding process,. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "135", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shijun:2023:RUT, author = "Xu Shijun and Yi Li and Zeng Cong and Jin Yong Tang", title = "Research on {UAV} Teaching Application and Technological Innovation with {5G} Technology and Development of High-pole Throwing Hydrangea", journal = j-TALLIP, volume = "22", number = "1", pages = "14:1--14:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529391", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3529391", abstract = "The technical innovation of high pole throwing Hydrangea is increasing and this study compares with the existing projection technology theory from two \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2023:UWS, author = "Lihao Wang and Xiaoqing Zheng", title = "Unsupervised Word Segmentation with Bi-directional Neural Language Model", journal = j-TALLIP, volume = "22", number = "1", pages = "17:1--17:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529387", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3529387", abstract = "We propose an unsupervised word segmentation model, in which for each unlabelled sentence sample, the learning objective is to maximize the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Marreddy:2023:RPL, author = "Mounika Marreddy and Subba Reddy Oota and Lakshmi Sireesha Vakada and Venkata Charan Chinni and Radhika Mamidi", title = "Am {I} a Resource-Poor Language? {Data} Sets, Embeddings, Models and Analysis for four different {NLP} Tasks in {Telugu} Language", journal = j-TALLIP, volume = "22", number = "1", pages = "18:1--18:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3531535", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3531535", abstract = "Due to the lack of a large annotated corpus, many resource-poor Indian languages struggle to reap the benefits of recent deep feature representations in Natural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hsueh:2023:TOC, author = "Yu-Ling Hsueh and Tai-Liang Chou", title = "A Task-oriented Chatbot Based on {LSTM} and Reinforcement Learning", journal = j-TALLIP, volume = "22", number = "1", pages = "19:1--19:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529649", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3529649", abstract = "Thanks to the advancements in deep learning, chatbots are widely used in messaging applications. Undoubtedly, a chatbot is a new way of interaction \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cho:2023:TIP, author = "Won Ik Cho and Nam Soo Kim", title = "Text Implicates Prosodic Ambiguity: a Corpus for Intention Identification of the {Korean} Spoken Language", journal = j-TALLIP, volume = "22", number = "1", pages = "21:1--21:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3529648", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3529648", abstract = "Phonetic features are indispensable in understanding the spoken language. Especially in Korean, which is wh-in-situ and head-final, the addressee of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Vo:2023:ITM, author = "Tham Vo", title = "An Integrated Topic Modelling and Graph Neural Network for Improving Cross-lingual Text Classification", journal = j-TALLIP, volume = "22", number = "1", pages = "22:1--22:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3530260", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3530260", abstract = "In recent years, along with the dramatic developments of deep learning in the natural language processing (NLP) domain, notable multilingual pre-trained language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2023:TRA, author = "Zeyu Huang and Wenge Rong and Xiaofeng Zhang and Yuanxin Ouyang and Chenghua Lin and Zhang Xiong", title = "Token Relation Aware {Chinese} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "1", pages = "24:1--24:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3531534", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3531534", abstract = "Due to the lack of natural delimiters, most Chinese Named Entity Recognition (NER) approaches are character-based and utilize an external lexicon to leverage the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rajalakshmi:2023:SDI, author = "E. Rajalakshmi and R. Elakkiya and Alexey L. Prikhodko and M. G. Grif and Maxim A. Bakaev and Jatinderkumar R. Saini and Ketan Kotecha and V. Subramaniyaswamy", title = "Static and Dynamic Isolated {Indian} and {Russian} Sign Language Recognition with Spatial and Temporal Feature Detection Using Hybrid Neural Network", journal = j-TALLIP, volume = "22", number = "1", pages = "26:1--26:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3530989", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3530989", abstract = "The Sign Language Recognition system intends to recognize the Sign language used by the hearing and vocally impaired populace. The interpretation of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Barhamtoshy:2023:AMR, author = "Hassanin M. Al-Barhamtoshy and Kamal M. Jambi and Mohsen A. Rashwan and Sherif M. Abdou", title = "An {Arabic} Manuscript Regions Detection, Recognition and Its Applications for {OCRing}", journal = j-TALLIP, volume = "22", number = "1", pages = "27:1--27:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532609", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3532609", abstract = "The problem of Region of Interest (RoI) in document layout analysis and document recognition has recently become an essential topic in OCRing systems. Arabic \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:EJC, author = "Zezhong Li and Fuji Ren and Xiao Sun and Degen Huang and Piao Shi", title = "Exploiting {Japanese-Chinese} Cognates with Shared Private Representations for {NMT}", journal = j-TALLIP, volume = "22", number = "1", pages = "28:1--28:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3533429", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3533429", abstract = "Neural machine translation has achieved remarkable progress over the past several years; however, little attention has been paid to machine translation (MT) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:RBF, author = "Jiusheng Chen and Xingkai Xu and Xiaoyu Zhang", title = "Radial Basis Function Attention for Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "1", pages = "30:1--30:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3539014", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3539014", abstract = "Attention mechanism is an increasingly important approach in the field of natural language processing (NLP). In the attention-based named entity recognition \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Elghannam:2023:MLA, author = "Fatma Elghannam", title = "Multi-Label Annotation and Classification of {Arabic} Texts Based on Extracted Seed Keyphrases and Bi-Gram Alphabet Feed Forward Neural Networks Model", journal = j-TALLIP, volume = "22", number = "1", pages = "31:1--31:??", month = jan, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3539607", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:45 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3539607", abstract = "In natural language processing, text classification is a fundamental problem. Multi-label classification of textual data is a challenging topic in text \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } %%% [31-Mar-2023] TO DO: v21n6 has not yet been published @Article{Priyadarshi:2023:SPR, author = "Ankur Priyadarshi and Sujan Kumar Saha", title = "A Study on the Performance of Recurrent Neural Network based Models in {Maithili} Part of Speech Tagging", journal = j-TALLIP, volume = "22", number = "2", pages = "32:1--32:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3540260", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3540260", abstract = "This article presents our effort in developing a Maithili Part of Speech (POS) tagger. Substantial effort has been devoted to developing POS taggers in several Indian languages, including Hindi, Bengali, Tamil, Telugu, Kannada, Punjabi, and Marathi; but \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2023:OLB, author = "Santosh Kumar Mishra and Harshit and Sriparna Saha and Pushpak Bhattacharyya", title = "An Object Localization-based Dense Image Captioning Framework in {Hindi}", journal = j-TALLIP, volume = "22", number = "2", pages = "33:1--33:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558391", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3558391", abstract = "Dense image captioning is a task that requires generating localized captions in natural language for multiple regions of an image. This task leverages its functionalities from both computer vision for recognizing regions in an image and natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2023:MTM, author = "Hang Yang and Yubo Chen and Kang Liu and Jun Zhao and Zuyu Zhao and Weijian Sun", title = "Multi-Turn and Multi-Granularity Reader for Document-Level Event Extraction", journal = j-TALLIP, volume = "22", number = "2", pages = "34:1--34:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3542925", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3542925", abstract = "Most existing event extraction works mainly focus on extracting events from one sentence. However, in real-world applications, arguments of one event may scatter across sentences and multiple events may co-occur in one document. Thus, these scenarios \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Eiselen:2023:INE, author = "Roald Eiselen and Andiswa Bukula", title = "{IsiXhosa} Named Entity Recognition Resources", journal = j-TALLIP, volume = "22", number = "2", pages = "35:1--35:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3531478", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3531478", abstract = "Named entity recognition has been one of the most widely researched natural language processing technologies over the past two decades. For the South African languages, however, relatively little research and development work has been done. This changed \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rai:2023:APB, author = "Pooja Rai and Sanjay Chatterji", title = "Annotation Projection-based Dependency Parser Development for {Nepali}", journal = j-TALLIP, volume = "22", number = "2", pages = "36:1--36:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3542696", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3542696", abstract = "Building computational resources and tools for the under-resourced languages is strenuous for any Natural Language Processing task. This article presents the first dependency parser for an under-resourced Indian language, Nepali. A prerequisite for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Demir:2023:TDT, author = "Seniz Demir", title = "{Turkish} Data-to-Text Generation Using Sequence-to-Sequence Neural Networks", journal = j-TALLIP, volume = "22", number = "2", pages = "37:1--37:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3543826", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3543826", abstract = "End-to-end data-driven approaches lead to rapid development of language generation and dialogue systems. Despite the need for large amounts of well-organized data, these approaches jointly learn multiple components of the traditional generation pipeline \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Goyal:2023:FBT, author = "Kapil Dev Goyal and Muhammad Raihan Abbas and Vishal Goyal and Yasir Saleem", title = "Forward-backward Transliteration of {Punjabi Gurmukhi} Script Using {$N$}-gram Language Model", journal = j-TALLIP, volume = "22", number = "2", pages = "38:1--38:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3542924", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3542924", abstract = "Transliterating the text of a language to a foreign script is called forward transliteration and transliterating the text back to the original script is called backward transliteration. In this work, we perform both forward as well as backward \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gomaa:2023:LAA, author = "Walid Gomaa", title = "Lyrics Analysis of the {Arab} Singer {Abdel ElHalim Hafez}", journal = j-TALLIP, volume = "22", number = "2", pages = "39:1--39:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544100", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3544100", abstract = "In this work we analyze the lyrics of one of the most famous and influential Arab artists in the twentieth century, namely, (Abdel ElHalim Hafez). Lyrics analysis provides a deep insight into the artist's career evolution and his interactions with the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2023:SDM, author = "Qingying Sun and Xuefeng Xi and Jiajun Sun and Zhongqing Wang and Huiyan Xu", title = "Stance Detection with a Multi-Target Adversarial Attention Network", journal = j-TALLIP, volume = "22", number = "2", pages = "40:1--40:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3544490", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3544490", abstract = "Stance detection aims to assign a stance label (in favor or against) to a post towards a specific target. In the literature, there are many studies focusing on this topic, and most of them treat stance detection as a supervised learning task. Therefore, a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alzubaidi:2023:NAG, author = "Mohammad A. Alzubaidi and Mwaffaq Otoom and Areen M. Abu Rwaq", title = "A Novel Assistive Glove to Convert {Arabic} Sign Language into Speech", journal = j-TALLIP, volume = "22", number = "2", pages = "41:1--41:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3545113", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3545113", abstract = "People with speech disorders often communicate through special gestures and sign language gestures. However, other people around them might not understand the meaning of those gestures. The research described in this article is aimed at providing an \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Do:2023:TOK, author = "Heejin Do and Gary Geunbae Lee", title = "Target-Oriented Knowledge Distillation with Language-Family-Based Grouping for Multilingual {NMT}", journal = j-TALLIP, volume = "22", number = "2", pages = "42:1--42:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546067", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3546067", abstract = "Multilingual NMT has developed rapidly, but still has performance degradation caused by language diversity and model capacity constraints. To achieve the competitive accuracy of multilingual translation despite such limitations, knowledge distillation, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ren:2023:UOR, author = "Feiliang Ren and Yongkang Liu and Bochao Li and Shilei Liu and Bingchao Wang and Jiaqi Wang and Chunchao Liu and Qi Ma", title = "An Understanding-oriented Robust Machine Reading Comprehension Model", journal = j-TALLIP, volume = "22", number = "2", pages = "43:1--43:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546190", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3546190", abstract = "Although existing machine reading comprehension models are making rapid progress on many datasets, they are far from robust. In this article, we propose an understanding-oriented machine reading comprehension model to address three kinds of robustness \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ramdinmawii:2023:PAC, author = "Esther Ramdinmawii and Sanghamitra Nath", title = "A Preliminary Analysis on the Correlates of Stress and Tones in Mizo", journal = j-TALLIP, volume = "22", number = "2", pages = "44:1--44:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3546950", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3546950", abstract = "Stress is the property of a language to exhibit prominence or distinction in one or more syllables in a given domain. The existence of word stress has not been suitably explored in previous acoustic studies of the Mizo language, which is a tonal language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bai:2023:BSO, author = "Guirong Bai and Shizhu He and Kang Liu and Jun Zhao", title = "Bidirectional Sentence Ordering with Interactive Decoding", journal = j-TALLIP, volume = "22", number = "2", pages = "45:1--45:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561510", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3561510", abstract = "Sentence ordering aims at restoring orders of shuffled sentences in a paragraph. Previous methods usually predict orders in a single direction, i.e., from head to tail. However, unidirectional prediction inevitably causes error accumulation, which \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Roy:2023:MEC, author = "Aniruddha Roy and Isha Sharma and Sudeshna Sarkar and Pawan Goyal", title = "{Meta-ED}: Cross-lingual Event Detection Using Meta-learning for {Indian} Languages", journal = j-TALLIP, volume = "22", number = "2", pages = "46:1--46:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555340", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3555340", abstract = "Lack of annotated data is a major concern in Event Detection (ED) tasks for low-resource languages. Cross-lingual ED seeks to address this issue by transferring information across various languages to improve overall performance. In this article, we \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lahoti:2023:SNR, author = "Pawan Lahoti and Namita Mittal and Girdhari Singh", title = "A Survey on {NLP} Resources, Tools, and Techniques for {Marathi} Language Processing", journal = j-TALLIP, volume = "22", number = "2", pages = "47:1--47:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3548457", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3548457", abstract = "Natural Language Processing (NLP) has been in practice for the past couple of decades, and extensive work has been done for the Western languages, particularly the English language. The Eastern counterpart, especially the languages of the Indian \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ilyas:2023:EDC, author = "Abdullah Ilyas and Khurram Shahzad and Muhammad Kamran Malik", title = "Emotion Detection in Code-Mixed {Roman Urdu--English} Text", journal = j-TALLIP, volume = "22", number = "2", pages = "48:1--48:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3552515", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3552515", abstract = "Emotion detection is a widely studied topic in natural language processing due to its significance in a number of application areas. A plethora of studies have been conducted on emotion detection in European as well as Asian languages. However, a large \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2023:CLB, author = "Maofu Liu and Junyi Xiang and Xu Xia and Huijun Hu", title = "Contrastive Learning between Classical and Modern {Chinese} for Classical {Chinese} Machine Reading Comprehension", journal = j-TALLIP, volume = "22", number = "2", pages = "49:1--49:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551637", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3551637", abstract = "By leveraging self-supervised tasks, pre-trained language model (PLM) has made significant progress in the field of machine reading comprehension (MRC). However, in classical Chinese MRC (CCMRC), the passage is typically in classical style, but the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nag:2023:TLL, author = "Arijit Nag and Bidisha Samanta and Animesh Mukherjee and Niloy Ganguly and Soumen Chakrabarti", title = "Transfer Learning for Low-Resource Multilingual Relation Classification", journal = j-TALLIP, volume = "22", number = "2", pages = "50:1--50:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554734", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3554734", abstract = "Relation classification (sometimes called relation extraction ) requires trustworthy datasets for fine-tuning large language models, as well as for evaluation. Data collection is challenging for Indian languages, because they are syntactically and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{DSilva:2023:ISM, author = "Jovi D'Silva and Uzzal Sharma", title = "Impact of Similarity Measures in Graph-based Automatic Text Summarization of {Konkani} Texts", journal = j-TALLIP, volume = "22", number = "2", pages = "51:1--51:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554943", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/pagerank.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3554943", abstract = "Automatic text summarization is a popular area in Natural Language Processing and Machine Learning. In this work, we adopt a graph-based text summarization approach, using PageRank algorithm, for automatically summarizing Konkani text documents. Konkani, an Indo--Aryan language spoken primarily in the state of Goa, which is on the west coast of India. It is a low-resource language with limited language processing tools. Such tools are readily available in other popular languages of choice for automatic text summarization, like English. The Konkani language dataset used for this purpose is based on Konkani folktales. We examine the impact of various language-independent and language-dependent similarity measures on the construction of the graph. The language-dependent similarity measures use pre-trained fastText word embeddings. A fully connected undirected graph is constructed for each document with the sentences represented as the graph's vertices. The vertices are connected to each other based on how strongly they are related to one another. Thereafter, PageRank algorithm is used for ranking the scores of the vertices. The top-ranking sentences are used to generate the summary. ROUGE toolkit was used for evaluating the quality of these system-generated summaries, and the performance was evaluated against human generated ``gold-standard'' abstracts and also compared with baselines and benchmark systems. The experimental results show that language-independent similarity measures performed well compared to language-dependent similarity measures despite not using language-specific tools, such as stop-words list, stemming, and word embeddings.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chiba:2023:AVE, author = "Yuya Chiba and Ryuichiro Higashinaka", title = "Analyzing Variations of Everyday {Japanese} Conversations Based on Semantic Labels of Functional Expressions", journal = j-TALLIP, volume = "22", number = "2", pages = "52:1--52:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3552310", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3552310", abstract = "To achieve effective dialogue processing, the kinds of daily conversations people have must be clarified. Unfortunately, the characteristics of everyday conversations remain insufficiently investigated. In recent years, the Corpus of Everyday Japanese \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:GSG, author = "Mengli Zhang and Gang Zhou and Wanting Yu and Ningbo Huang and Wenfen Liu", title = "{GA-SCS}: Graph-Augmented Source Code Summarization", journal = j-TALLIP, volume = "22", number = "2", pages = "53:1--53:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554820", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3554820", abstract = "Automatic source code summarization system aims to generate a valuable natural language description for a program, which can facilitate software development and maintenance, code categorization, and retrieval. However, previous sequence-based research did \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2023:SEP, author = "Bo Zhou and Yubo Chen and Kang Liu and Jun Zhao", title = "Script Event Prediction via Multilingual Event Graph Networks", journal = j-TALLIP, volume = "22", number = "2", pages = "54:1--54:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3557893", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3557893", abstract = "Predicting what happens next in text plays a critical role in building NLP applications. Many methods including count-based and neural-network-based have been proposed to tackle the task called script event prediction: predicting the most suitable \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guven:2023:CLM, author = "Zekeriya Anil Guven", title = "The Comparison of Language Models with a Novel Text Filtering Approach for {Turkish} Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "2", pages = "55:1--55:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3557892", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3557892", abstract = "Today, comments can be made on many topics on web platforms with the development of the internet. Analyzing the data of these comments is essential for companies and data scientists. There are many methods for analyzing data. Recently, language models \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lei:2023:LPF, author = "Lei Lei and Guoshun Yuan and Tianle Zhang and Hongjiang Yu", title = "Low-Power Feature-Attention {Chinese} Keyword Spotting Framework with Distillation Learning", journal = j-TALLIP, volume = "22", number = "2", pages = "56:1--56:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558002", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3558002", abstract = "In this paper, we propose a novel Low-Power Feature-Attention Chinese Keyword Spotting Framework based on a depthwise separable convolution neural network (DSCNN) with distillation learning to recognize speech signals of Chinese wake-up words. The \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bidgoly:2023:CBS, author = "Amir Jalaly Bidgoly and Hossein Amirkhani and Razieh Baradaran", title = "Clustering-based Sequence to Sequence Model for Generative Question Answering in a Low-resource Language", journal = j-TALLIP, volume = "22", number = "2", pages = "57:1--57:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563036", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3563036", abstract = "Despite the impressive success of sequence to sequence models for generative question answering, they need a vast amount of question-answer pairs during training, which is hard and expensive to obtain, especially for low-resource languages. In this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Komiya:2023:CWE, author = "Kanako Komiya and Shinji Kono and Takumi Seito and Teruo Hirabayashi", title = "Composing Word Embeddings for Compound Words Using Linguistic Knowledge", journal = j-TALLIP, volume = "22", number = "2", pages = "58:1--58:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561299", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3561299", abstract = "In recent years, the use of distributed representations has been a fundamental technology for natural language processing. However, Japanese has multiple compound words, and often we must compare the meanings of a word and a compound word. Moreover, word \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nambiar:2023:AST, author = "Sindhya K. Nambiar and David Peter S. and Sumam Mary Idicula", title = "Abstractive Summarization of Text Document in {Malayalam} Language: Enhancing Attention Model Using {POS} Tagging Feature", journal = j-TALLIP, volume = "22", number = "2", pages = "59:1--59:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561819", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3561819", abstract = "Over the past few years, researchers are showing huge interest in sentiment analysis and summarization of documents. The primary reason being that huge volumes of information are available in textual format, and this data has proven helpful for real-world \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{P:2023:IEF, author = "Jasir M. P. and Kannan Balakrishnan", title = "Identification and Extraction of Features from {Malayalam} Poems for Analyzing Syllable Duration Patterns", journal = j-TALLIP, volume = "22", number = "2", pages = "60:1--60:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3561298", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3561298", abstract = "Text-to-speech (TTS) synthesis is an active area of research to generate synthetic speech from the underlying text. Compared to English and many European languages, TTS is yet to mature in Malayalam, the principal language of the South Indian state of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{FHA:2023:DEM, author = "Shibly FHA and Uzzal Sharma and HMM. Naleer", title = "Development of an Efficient Method to Detect Mixed Social Media Data with {Tamil--English} Code Using Machine Learning Techniques", journal = j-TALLIP, volume = "22", number = "2", pages = "61:1--61:??", month = feb, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563775", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Mar 31 09:33:46 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3563775", abstract = "On social networking sites, online hate speech has become more prevalent due to the quick expansion of mobile computing and Web technology. Previous research has found that being exposed to Internet hate speech has substantial offline implications for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2023:ISI, author = "Jerry Chun-Wei Lin and Vicente Garc{\'i}a D{\'i}az and Juan Antonio Morente Molinera", title = "Introduction to the Special Issue of Recent Advances in Computational Linguistics for {Asian} Languages", journal = j-TALLIP, volume = "22", number = "3", pages = "62:1--62:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588316", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588316", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2023:ELE, author = "Shuai Liu and Tenghui He and Jingyi Li and Yating Li and Akshi Kumar", title = "An Effective Learning Evaluation Method Based on Text Data with Real-time Attribution --- a Case Study for Mathematical Class with Students of Junior Middle School in {China}", journal = j-TALLIP, volume = "22", number = "3", pages = "63:1--63:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3474367", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3474367", abstract = "In today's intelligent age, the vigorous development of education-based information analysis technology has had a profound impact on the education and teaching process. The use of computational linguistics technology to extract teaching data for learning \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhu:2023:UPS, author = "Shaolin Zhu and Chenggang Mi and Tianqi Li and Yong Yang and Chun Xu", title = "Unsupervised Parallel Sentences of Machine Translation for {Asian} Language Pairs", journal = j-TALLIP, volume = "22", number = "3", pages = "64:1--64:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3486677", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3486677", abstract = "Parallel sentence pairs play a very important role in many natural language processing tasks, especially cross-lingual tasks such as machine translation. So far, many Asian language pairs lack bilingual parallel sentences. As collecting bilingual parallel \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fang:2023:ECE, author = "Hui Fang and Hongmei Shi and Jiuzhou Zhang and Marimuthu Karuppiah", title = "Effective College {English} Teaching Based on Teacher--Student Interactive Model", journal = j-TALLIP, volume = "22", number = "3", pages = "65:1--65:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3486676", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3486676", abstract = "English has become an utterly crucial device to take part in global verbal exchange and competition. It is essential to enhance English teaching's flexibility to meet the desires to improve the market economy. Therefore, powerful coaching strategies and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sonthi:2023:ITH, author = "Vijaya Krishna Sonthi and Nagarajan S. and Krishnaraj N.", title = "An Intelligent {Telugu} Handwritten Character Recognition Using Multi-Objective Mayfly Optimization with Deep Learning-Based {DenseNet} Model", journal = j-TALLIP, volume = "22", number = "3", pages = "67:1--67:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3520439", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3520439", abstract = "The handwritten character recognition process has gained significant attention among research communities due to its application in assistive technologies for visually impaired people, human-robot interaction, automated registry for business documents, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:TED, author = "Shaobo Li and Chengjie Sun and Zhen Xu and Prayag Tiwari and Bingquan Liu and Deepak Gupta and K. Shankar and Zhenzhou Ji and Mingjiang Wang", title = "Toward Explainable Dialogue System Using Two-stage Response Generation", journal = j-TALLIP, volume = "22", number = "3", pages = "68:1--68:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3551869", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3551869", abstract = "In recent years, neural networks have achieved impressive performance on dialogue response generation. However, most of these models still suffer from some shortcomings, such as yielding uninformative responses and lacking explainable ability. This \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:RIA, author = "Keliang Chen and Jianming Huang and Qi Zhang and Yansong Cui", title = "Research and Implementation of Automatic Indexing Method of {PDF} for Digital Publishing", journal = j-TALLIP, volume = "22", number = "3", pages = "69:1--69:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3501400", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3501400", abstract = "With the rapid development of mobile Internet technology and artificial intelligence technology, the digital publishing industry is in urgent need of using intelligent technology to change the current way of content production and service. Most of the e-book resources owned by publishing enterprises are in PDF format, which is not suitable for reading on mobile devices, and it is not convenient to directly extract key information and construct knowledge graph. With this in mind, this article designs a PDF automatic indexing scheme that can identify all the element information in PDF and output structured data automatically and then extract all the key information in it to generate a keyword library with tag weights. The scheme mainly involves two key technical points: parsing PDF based on text features and grammar rules and extracting keywords based on tag weights. The former visualizes the text block in PDF into a rectangular area, divides the elements by clustering algorithm, and, finally, outputs structured data containing all the information. The latter combines the tags and their weights in the structured data and extracts the keywords in it by the inter-word relation algorithm. The structured data and keywords database produced by this scheme can be used to produce intelligent e-book and build knowledge graph, thus helping publishing enterprises to transform from a content service provider to an intelligent knowledge service provider. This transformation can deeply excavate the core value of the content held by the publishing industry and promote the digitization and intelligentization process of the whole industry.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shoaib:2023:CAU, author = "Umar Shoaib and Laiba Fiaz and Chinmay Chakraborty and Hafiz Tayyab Rauf", title = "Context-aware {Urdu} Information Retrieval System", journal = j-TALLIP, volume = "22", number = "3", pages = "70:1--70:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3502854", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3502854", abstract = "World Wide Web (WWW) is playing a vital role for sharing dynamic knowledge in every field of life. The information on web comprises a huge amount of data in different forms such as structured, semi structured, or few is totally in unstructured format. Due \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:RCA, author = "Keliang Chen and Jianming Huang and Yansong Cui and Weizheng Ren", title = "Research on {Chinese} Audio and Text Alignment Algorithm Based on {AIC-FCM} and {Doc2Vec}", journal = j-TALLIP, volume = "22", number = "3", pages = "71:1--71:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3532852", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3532852", abstract = "``Audiobook'' is a multimedia-based reading technology that has emerged in recent years. Realizing the alignment of e-book text and book audio is the most important part of its processing. This article describes an audio and text alignment algorithm using \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nosrati:2023:WRE, author = "Vahid Nosrati and Mohsen Rahmani and Alireza Jolfaei and Sattar Seifollahi", title = "A Weak-Region Enhanced {Bayesian} Classification for Spam Content-Based Filtering", journal = j-TALLIP, volume = "22", number = "3", pages = "72:1--72:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3510420", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3510420", abstract = "This article proposes an improved Bayesian scheme by focusing on the region in which Bayesian may fail to correctly identify labels and improve classification performance by handling those errors. Bayesian method, as a probabilistic classifier, uses Bayes'. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xue:2023:IHO, author = "Xingsi Xue and Wenyu Liu", title = "Integrating Heterogeneous Ontologies in {Asian} Languages Through Compact Genetic Algorithm with Annealing Re-sample Inheritance Mechanism", journal = j-TALLIP, volume = "22", number = "3", pages = "73:1--73:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3519298", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3519298", abstract = "An ontology is a state-of-the-art knowledge modeling technique in the natural language domain, which has been widely used to overcome the linguistic barriers in Asian and European countries' intelligent applications. However, due to the different \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Feng:2023:DLC, author = "Hailin Feng and Shuxuan Xie and Wei Wei and Haibin Lv and Zhihan Lv", title = "Deep Learning in Computational Linguistics for {Chinese} Language Translation", journal = j-TALLIP, volume = "22", number = "3", pages = "74:1--74:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3519386", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3519386", abstract = "Applying artificial intelligence to Chinese language translation in computational linguistics is of practical significance for economic boosts and cultural exchanges. In the present work, the bi-directional long short-term memory (BiLSTM) network is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Prabadevi:2023:DMR, author = "B. Prabadevi and N. Deepa and K. Ganesan and Gautam Srivastava", title = "A Decision Model for Ranking {Asian} Higher Education Institutes Using an {NLP}-Based Text Analysis Approach", journal = j-TALLIP, volume = "22", number = "3", pages = "75:1--75:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3534562", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3534562", abstract = "Identification of the best institute for higher education has become one of the most challenging issues in the present education system. It has become more complicated as more institutes exist with extraordinary infrastructural facilities. Therefore, a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:DIF, author = "Tao Zhang and Congying Xia and Zhiwei Liu and Shu Zhao and Hao Peng and Philip Yu", title = "Domain-Invariant Feature Progressive Distillation with Adversarial Adaptive Augmentation for Low-Resource Cross-Domain {NER}", journal = j-TALLIP, volume = "22", number = "3", pages = "76:1--76:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570502", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3570502", abstract = "Considering the expensive annotation in Named Entity Recognition (NER), Cross-domain NER enables NER in low-resource target domains with few or without labeled data, by transferring the knowledge of high-resource domains. However, the discrepancy between \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2023:TBU, author = "Yuzhi Liu and Massimo Piccardi", title = "Topic-Based Unsupervised and Supervised Dictionary Induction", journal = j-TALLIP, volume = "22", number = "3", pages = "77:1--77:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564698", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564698", abstract = "Word translation is a natural language processing task that provides translation between the words of a source and a target language. As a task, it reduces to the induction of a bilingual dictionary, which is typically performed by aligning word \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:KSL, author = "Chao Li and Xin Yan and Guangyi Xu and Zhongying Deng and Yuanyuan Mo", title = "{Khmer} Sentiment Lexicon Based on {PU} Learning and Label Propagation Algorithm", journal = j-TALLIP, volume = "22", number = "3", pages = "78:1--78:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564697", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564697", abstract = "The sentiment lexicon is an important tool for natural language processing tasks. In addition to being able to determine the sentiment polarity of words or phrases, it can assist attribute-level, sentence-level, and text-level sentiment analysis tasks. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2023:EIM, author = "Chunpeng Ma and Aili Shen and Hiyori Yoshikawa and Tomoya Iwakura and Daniel Beck and Timothy Baldwin", title = "On the Effectiveness of Images in Multi-modal Text Classification: an Annotation Study", journal = j-TALLIP, volume = "22", number = "3", pages = "79:1--79:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3565572", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3565572", abstract = "Combining different input modalities beyond text is a key challenge for natural language processing. Previous work has been inconclusive as to the true utility of images as a supplementary information source for text classification tasks, motivating this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zi:2023:BBS, author = "Yunfei Zi and Shengwu Xiong", title = "{BSML}: Bidirectional Sampling Aggregation-based Metric Learning for Low-resource {Uyghur} Few-shot Speaker Verification", journal = j-TALLIP, volume = "22", number = "3", pages = "80:1--80:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564782", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564782", abstract = "In recent years, text-independent speaker verification has remained a hot research topic, especially for the limited enrollment and/or test data. At the same time, due to the lack of sufficient training data, the study of low-resource few-shot speaker \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rahman:2023:PPB, author = "Habibur Rahman and Md. Rezwan Shahrior Rahin and Araf Mohammad Mahbub and Md. Adnanul Islam and Md. Saddam Hossain Mukta and Md. Mahbubur Rahman", title = "Punctuation Prediction in {Bangla} Text", journal = j-TALLIP, volume = "22", number = "3", pages = "81:1--81:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575804", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3575804", abstract = "Punctuation prediction is critical as it can enhance the readability of machine-transcribed speeches or texts significantly by adding appropriate punctuation. Furthermore, systems like Automatic Speech Recognizer (ASR) produce texts that are unpunctuated, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tran:2023:BCD, author = "Phuoc Tran and Dat Nguyen and Huu-Anh Tran and Thien Nguyen and Tram Tran", title = "Building a Closed-Domain Question Answering System for a Low-Resource Language", journal = j-TALLIP, volume = "22", number = "3", pages = "82:1--82:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3566123", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3566123", abstract = "In recent years, the Question Answering System (QAS) has been widely used to develop many systems, such as conversation systems, chatbots, and intelligent search. Depending on the amount of information or knowledge that the system processes, the system \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Muneer:2023:DLB, author = "Iqra Muneer and Ghazeefa Fatima and Muhammad Salman Khan and Rao Muhammad Adeel Nawab and Ali Saeed", title = "Developing a Large Benchmark Corpus for {Urdu} Semantic Word Similarity", journal = j-TALLIP, volume = "22", number = "3", pages = "83:1--83:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3566124", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3566124", abstract = "The semantic word similarity task aims to quantify the degree of similarity between a pair of words. In literature, efforts have been made to create standard evaluation resources to develop, evaluate, and compare various methods for semantic word \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mahlaza:2023:SRA, author = "Zola Mahlaza and C. Maria Keet", title = "Surface Realization Architecture for Low-resourced {African} Languages", journal = j-TALLIP, volume = "22", number = "3", pages = "84:1--84:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3567594", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3567594", abstract = "There has been growing interest in building surface realization systems to support the automatic generation of text in African languages. Such tools focus on converting abstract representations of meaning to a text. Since African languages are low-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kchaou:2023:HPB, author = "Sam{\'e}h Kchaou and Rahma Boujelbane and Lamia Hadrich", title = "Hybrid Pipeline for Building {Arabic Tunisian} Dialect-standard {Arabic} Neural Machine Translation Model from Scratch", journal = j-TALLIP, volume = "22", number = "3", pages = "85:1--85:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568674", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3568674", abstract = "Deep Learning is one of the most promising technologies compared to other methods in the context of machine translation. It has been proven to achieve impressive results on large amounts of parallel data for well-endowed languages. Nevertheless, for low-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaur:2023:MSP, author = "Navdeep Kaur and Parminder Singh", title = "Modelling of Speech Parameters of {Punjabi} by Pre-trained Deep Neural Network Using Stacked Denoising Autoencoders", journal = j-TALLIP, volume = "22", number = "3", pages = "86:1--86:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568308", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3568308", abstract = "Statistical parametric speech synthesis techniques such as deep neural network (DNN) and hidden Markov model (HMM) have grown in popularity since last decade over the concatenative speech synthesis approaches by modelling excitation and spectral \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{S:2023:MNF, author = "Rakesh Kumar S. and Gayathri Nagasubramanian and Muthuramalingam S. and Fadi Al-Turjman", title = "Multilingual News Feed Analysis using Intelligent Linguistic Particle Filtering Techniques", journal = j-TALLIP, volume = "22", number = "3", pages = "87:1--87:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3569899", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3569899", abstract = "Analyzing real-time news feeds and their impacts in the real world is a complex task in the social networking arena. Particularly, countries with a multilingual environment have various patterns and perceptions of news reports considering the diversity of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2023:SCS, author = "Tinghuai Ma and Zheng Zhang and Huan Rong and Najla Al-Nabhan", title = "{SPK-CG}: {Siamese} Network based Posterior Knowledge Selection Model for Knowledge Driven Conversation Generation", journal = j-TALLIP, volume = "22", number = "3", pages = "88:1--88:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3569579", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3569579", abstract = "Building a human-computer conversational system that can communicate with humans is a research hotspot in the field of artificial intelligence. Traditional dialogue systems tend to produce irrelevant and non-information responses, which reduce people's \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2023:CGE, author = "Hongfei Wang and Michiki Kurosawa and Satoru Katsumata and Masato Mita and Mamoru Komachi", title = "{Chinese} Grammatical Error Correction Using Pre-trained Models and Pseudo Data", journal = j-TALLIP, volume = "22", number = "3", pages = "89:1--89:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570209", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3570209", abstract = "In recent studies, pre-trained models and pseudo data have been key factors in improving the performance of the English grammatical error correction (GEC) task. However, few studies have examined the role of pre-trained models and pseudo data in the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "89", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Su:2023:SBT, author = "Xuefeng Su and Ru Li and Xiaoli Li and Baobao Chang and Zhiwei Hu and Xiaoqi Han and Zhichao Yan", title = "A Span-based Target-aware Relation Model for Frame-semantic Parsing", journal = j-TALLIP, volume = "22", number = "3", pages = "90:1--90:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3569581", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3569581", abstract = "Frame-semantic Parsing (FSP) is a challenging and critical task in Natural Language Processing (NLP). Most of the existing studies decompose the FSP task into frame identification (FI) and frame semantic role labeling (FSRL) subtasks, and adopt a pipeline \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "90", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Long:2023:DNN, author = "Kaifang Long and Han Zhao and Zengzhen Shao and Yang Cao and Yanfang Geng and Yintai Sun and Weizhi Xu and Hui Yu", title = "Deep Neural Network with Embedding Fusion for {Chinese} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "3", pages = "91:1--91:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3570328", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3570328", abstract = "Chinese Named Entity Recognition (NER) is an essential task in natural language processing, and its performance directly impacts the downstream tasks. The main challenges in Chinese NER are the high dependence of named entities on context and the lack of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "91", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yirmibesoglu:2023:MMI, author = "Zeynep Y{\.\i}rm{\.\i}besoglu and Tunga G{\"u}ng{\"o}r", title = "Morphologically Motivated Input Variations and Data Augmentation in {Turkish--English} Neural Machine Translation", journal = j-TALLIP, volume = "22", number = "3", pages = "92:1--92:??", month = mar, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571073", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri May 19 06:27:21 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571073", abstract = "Success of neural networks in natural language processing has paved the way for neural machine translation (NMT), which rapidly became the mainstream approach in machine translation. Significant improvement in translation performance has been achieved \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "92", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alqahtani:2023:SAI, author = "Fatimah Alqahtani and Mischa Dohler", title = "Survey of Authorship Identification Tasks on {Arabic} Texts", journal = j-TALLIP, volume = "22", number = "4", pages = "93:1--93:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564156", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564156", abstract = "Authorship identification is the process of extracting and analysing the writing styles of authors to identify the authorship. From the writing style, the author and his/her different characteristics can be recognised, which is very useful in digital \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "93", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Attar:2023:SAA, author = "Rakesh Kumar Attar and Vishal Goyal and Lalit Goyal", title = "State of the Art of Automation in Sign Language: a Systematic Review", journal = j-TALLIP, volume = "22", number = "4", pages = "94:1--94:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564769", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564769", abstract = "Sign language is the fundamental communication language of deaf people. Efforts to develop sign language generation systems can make the life of these people smooth and effortless. Despite the importance of sign language generation systems, there is a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "94", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nassiri:2023:AMR, author = "Naoual Nassiri and Violetta Cavalli-Sforza and Abdelhak Lakhouaja", title = "Approaches, Methods, and Resources for Assessing the Readability of {Arabic} Texts", journal = j-TALLIP, volume = "22", number = "4", pages = "95:1--95:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571510", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571510", abstract = "Text readability assessment is a well-known problem that has acquired even more importance in today's information-rich world. In this article, we survey various approaches to measuring and assessing the readability of texts. Our specific goal is to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "95", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shammi:2023:CRB, author = "Shumaiya Akter Shammi and Sajal Das and Narayan Ranjan Chakraborty and Sumit Kumar Banshal and Nishu Nath", title = "A Comprehensive Roadmap on {Bangla} Text-based Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "4", pages = "96:1--96:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572783", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3572783", abstract = "The effortless expansion of Internet access has eventually transformed the dissemination behavior toward E-Mode. Thus, the usage of online or, more specifically, ``Digital'' texts has expanded abruptly. ``Bangla,'' the seventh most spoken language globally, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "96", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Naous:2023:ODR, author = "Tarek Naous and Zahraa Bassyouni and Bassel Mousi and Hazem Hajj and Wassim {El Hajj} and Khaled Shaban", title = "Open-Domain Response Generation in Low-Resource Settings using Self-Supervised Pre-Training of Warm-Started Transformers", journal = j-TALLIP, volume = "22", number = "4", pages = "97:1--97:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579164", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579164", abstract = "Learning response generation models constitute the main component of building open-domain dialogue systems. However, training open-domain response generation models requires large amounts of labeled data and pre-trained language generation models that are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "97", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Touma:2023:AGH, author = "Roudy Touma and Hazem Hajj and Wassim El-Hajj and Khaled Shaban", title = "Automated Generation of Human-readable Natural {Arabic} Text from {RDF} Data", journal = j-TALLIP, volume = "22", number = "4", pages = "98:1--98:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582262", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3582262", abstract = "With the advances in Natural Language Processing (NLP), the industry has been moving towards human-directed artificial intelligence (AI) solutions. Recently, chatbots and automated news generation have captured a lot of attention. The goal is to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "98", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:FPT, author = "Jie Chen and Zhiqiang Yao and Shu Zhao and Yanping Zhang", title = "Fusion Pre-trained Emoji Feature Enhancement for Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "4", pages = "99:1--99:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578582", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578582", abstract = "Emoji are often used in social media to enrich users' emotions, and they play an important role in the task of social media sentiment analysis. In practice, researchers are more likely to consider emoji as special symbols and treat them separately from \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "99", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2023:CAT, author = "Jiangwei Liu and Zian Yan and Sibao Chen and Xiao Sun and Bin Luo", title = "Channel Attention {TextCNN} with Feature Word Extraction for {Chinese} Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "4", pages = "100:1--100:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571716", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571716", abstract = "Chinese short text sentiment analysis can help understand society's views on various hot topics. Many existing sentiment analysis methods are based on sentiment dictionaries. Still, sentiment dictionaries are easily affected by subjective factors. They \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "100", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mi:2023:IRL, author = "Chenggang Mi", title = "Improving the Robustness of Loanword Identification in Social Media Texts", journal = j-TALLIP, volume = "22", number = "4", pages = "101:1--101:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572773", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3572773", abstract = "As a potential bilingual resource, loanwords play a very important role in many natural language processing tasks. If loanwords in a low-resource language can be identified effectively, the generated donor-receipt word pairs will benefit many cross-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "101", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guo:2023:CSS, author = "Dongyue Guo and Jianwei Zhang and Bo Yang and Yi Lin", title = "A Comparative Study of Speaker Role Identification in Air Traffic Communication Using Deep Learning Approaches", journal = j-TALLIP, volume = "22", number = "4", pages = "102:1--102:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572792", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3572792", abstract = "Automatic spoken instruction understanding (SIU) of the controller-pilot conversations in the air traffic control (ATC) requires not only recognizing the words and semantics of the speech but also determining the role of the speaker. However, few of the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "102", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Duwairi:2023:MVL, author = "Rehab Duwairi and Zain Halloush", title = "A Multi-View Learning Approach for Detecting Personality Disorders Among {Arab} Social Media Users", journal = j-TALLIP, volume = "22", number = "4", pages = "103:1--103:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572906", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3572906", abstract = "Multi-view fusion approaches have gained increasing interest in the past few years by researchers. This interest comes due to the many perspectives that datasets can be looked at and evaluated. One of the most urging areas that require constant leveraging \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "103", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2023:DLR, author = "Wang Xu and Kehai Chen and Tiejun Zhao", title = "Document-Level Relation Extraction with Path Reasoning", journal = j-TALLIP, volume = "22", number = "4", pages = "104:1--104:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3572898", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3572898", abstract = "Document-level relation extraction (DocRE) aims to extract relations among entities across multiple sentences within a document by using reasoning skills (i.e., pattern recognition, logical reasoning, coreference reasoning, etc.) related to the reasoning \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "104", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Di:2023:BDU, author = "Donglin Di and Xianyang Song and Weinan Zhang and Yue Zhang and Fanglin Wang", title = "Building Dialogue Understanding Models for Low-resource Language {Indonesian} from Scratch", journal = j-TALLIP, volume = "22", number = "4", pages = "105:1--105:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575803", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3575803", abstract = "Using off-the-shelf resources from resource-rich languages to transfer knowledge to low-resource languages has received a lot of attention. The requirements of enabling the model to achieve the reliable performance, including the scale of required \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "105", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Balakrishnan:2023:TOL, author = "Vimala Balakrishnan and Vithyatheri Govindan and Kumanan N. Govaichelvan", title = "{Tamil} Offensive Language Detection: Supervised versus Unsupervised Learning Approaches", journal = j-TALLIP, volume = "22", number = "4", pages = "106:1--106:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575860", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3575860", abstract = "Studies on natural language processing are mainly conducted in English, with very few exploring languages that are under-resourced, including the Dravidian languages. We present a novel work in detecting offensive language using a corpus collected from \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "106", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Berrimi:2023:AMA, author = "Mohamed Berrimi and Mourad Oussalah and Abdelouahab Moussaoui and Mohamed Saidi", title = "Attention Mechanism Architecture for {Arabic} Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "4", pages = "107:1--107:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578265", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578265", abstract = "This article tackles the problem of sentiment analysis in the Arabic language where a new deep learning model has been put forward. The proposed model uses a hybrid bidirectional gated recurrent unit (BiGRU) and bidirectional long short-term memory \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "107", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ghosal:2023:HUH, author = "Sayani Ghosal and Amita Jain", title = "{HateCircle} and Unsupervised Hate Speech Detection Incorporating Emotion and Contextual Semantics", journal = j-TALLIP, volume = "22", number = "4", pages = "108:1--108:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3576913", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3576913", abstract = "The explosive growth of social media has fueled an extensive increase in online freedom of speech. The worldwide platform of human voice creates possibilities to assail other users without facing any consequences, and flout social etiquettes, resulting in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "108", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:LRN, author = "Yinqiao Li and Runzhe Cao and Qiaozhi He and Tong Xiao and Jingbo Zhu", title = "Learning Reliable Neural Networks with Distributed Architecture Representations", journal = j-TALLIP, volume = "22", number = "4", pages = "109:1--109:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578709", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578709", abstract = "Neural architecture search (NAS) has shown the strong performance of learning neural models automatically in recent years. But most NAS systems are unreliable due to the architecture gap brought by discrete representations of atomic architectures. In this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "109", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tang:2023:NVG, author = "Yi-Kun Tang and Heyan Huang and Xuewen Shi and Xian-Ling Mao", title = "Neural Variational {Gaussian} Mixture Topic Model", journal = j-TALLIP, volume = "22", number = "4", pages = "110:1--110:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578583", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578583", abstract = "Neural variational inference-based topic modeling has gained great success in mining abstract topics from documents. However, these topic models usually mainly focus on optimizing the topic proportions for documents, while the quality and the internal \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "110", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2023:BSD, author = "Wei Yu and Haiyan Yang and Mengzhu Wang and Xiaodong Wang", title = "Bravely Say {I} Don't Know: Relational Question-Schema Graph for Text-to-{SQL} Answerability Classification", journal = j-TALLIP, volume = "22", number = "4", pages = "111:1--111:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579030", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579030", abstract = "Recently, the Text-to-SQL task has received much attention. Many sophisticated neural models have been invented that achieve significant results. Most current work assumes that all the inputs are legal and the model should generate an SQL query for any \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "111", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2023:DCB, author = "Santosh Kumar Mishra and Sushant Sinha and Sriparna Saha and Pushpak Bhattacharyya", title = "Dynamic Convolution-based Encoder-Decoder Framework for Image Captioning in {Hindi}", journal = j-TALLIP, volume = "22", number = "4", pages = "112:1--112:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3573891", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3573891", abstract = "In sequence-to-sequence modeling tasks, such as image captioning, machine translation, and visual question answering, encoder-decoder architectures are state of the art. An encoder, convolutional neural network (CNN) encodes input images into fixed \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "112", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wanjawa:2023:KQA, author = "Barack W. Wanjawa and Lilian D. A. Wanzare and Florence Indede and Owen Mconyango and Lawrence Muchemi and Edward Ombui", title = "{KenSwQuAD} --- a Question Answering Dataset for {Swahili} Low-resource Language", journal = j-TALLIP, volume = "22", number = "4", pages = "113:1--113:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578553", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578553", abstract = "The need for question-answering (QA) datasets in low-resource languages is the motivation of this research, leading to the development of the Kencorpus Swahili Question Answering Dataset (KenSwQuAD). This dataset is annotated from raw story texts of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "113", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Coban:2023:DCD, author = "Onder Coban and Selma Ayse Ozel and Ali Inan", title = "Detection and Cross-domain Evaluation of Cyberbullying in {Facebook} Activity Contents for {Turkish}", journal = j-TALLIP, volume = "22", number = "4", pages = "114:1--114:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580393", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3580393", abstract = "Cyberbullying refers to bullying and harassment of defenseless or vulnerable people such as children, teenagers, and women through any means of communication (e.g., e-mail, text messages, wall posts, tweets) over any online medium (e.g., social media, \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "114", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Akhter:2023:HPO, author = "Shaheera Saba Mohd Naseem Akhter and Priti P. Rege", title = "Hyper Parameter Optimization of {CRNN} for Printed {Devanagari} Script Recognition using {Taguchi}'s Method", journal = j-TALLIP, volume = "22", number = "4", pages = "115:1--115:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578549", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578549", abstract = "The Devanagari script is one of the most widely used scripts worldwide. The existing deep learning-based optical character recognition system for printed Devanagari scripts using Convolutional Neural Network --- Recurrent Neural Network, or CRNN is not \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "115", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Toraman:2023:ITL, author = "Cagri Toraman and Eyup Halit Yilmaz and Furkan Sah{\.\i}nu{\c{c}} and Oguzhan Ozcelik", title = "Impact of Tokenization on Language Models: an Analysis for {Turkish}", journal = j-TALLIP, volume = "22", number = "4", pages = "116:1--116:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578707", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578707", abstract = "Tokenization is an important text preprocessing step to prepare input tokens for deep language models. WordPiece and BPE are de facto methods employed by important models, such as BERT and GPT. However, the impact of tokenization can be different for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "116", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sebastian:2023:MNL, author = "Mary Priya Sebastian and Santhosh Kumar G.", title = "{Malayalam} Natural Language Processing: Challenges in Building a Phrase-Based Statistical Machine Translation System", journal = j-TALLIP, volume = "22", number = "4", pages = "117:1--117:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579163", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579163", abstract = "Statistical Machine Translation (SMT) is a preferred Machine Translation approach to convert the text in a specific language into another by automatically learning translations using a parallel corpus. SMT has been successful in producing quality \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "117", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2023:MTL, author = "Xuelei Wang and Xirong Xu and Degen Huang and Ting Zhang", title = "Multi-task Label-wise Transformer for {Chinese} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "4", pages = "118:1--118:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3576025", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3576025", abstract = "Benefiting from the improvement of positional encoding and the introduction of lexical knowledge, Transformer has achieved superior performance than the prevailing BiLSTM-based models in named entity recognition (NER) task. However, existing Transformer-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "118", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2023:STC, author = "Qianqian Xu and Junjie Peng and Cangzhi Zheng and Shuhua Tan and Fen Yi and Feng Cheng", title = "Short Text Classification of {Chinese} with Label Information Assisting", journal = j-TALLIP, volume = "22", number = "4", pages = "119:1--119:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582301", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3582301", abstract = "As a common language form in oral communication, short text is hard to be used in the applications such as intent understanding, text classification and so on due to its limited content and information, as well as irregular expression and missing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "119", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jha:2023:FEV, author = "Piyush Jha and Rashi Kumar and Vineet Sahula", title = "Filtering and Extended Vocabulary based Translation for Low-resource Language Pair of {Sanskrit--Hindi}", journal = j-TALLIP, volume = "22", number = "4", pages = "120:1--120:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580495", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3580495", abstract = "Neural Machine Translation (NMT) is widely employed for language translation tasks because it performs better than the conventional statistical and phrase-based approaches. However, NMT techniques involve challenges, such as requiring a large and clean \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "120", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hafeez:2023:USP, author = "Hamza Hafeez and Iqra Muneer and Muhammad Sharjeel and Muhammad Adnan Ashraf and Rao Muhammad Adeel Nawab", title = "{Urdu} Short Paraphrase Detection at Sentence Level", journal = j-TALLIP, volume = "22", number = "4", pages = "121:1--121:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3586009", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3586009", abstract = "Paraphrase detection systems uncover the relationship between two text fragments and classify them as paraphrased when they convey the same idea; otherwise non-paraphrased. Previously, the researchers have mainly focused on developing resources for the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "121", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2023:LCD, author = "Xiangyu Wang and Chengqing Zong", title = "Learning Category Distribution for Text Classification", journal = j-TALLIP, volume = "22", number = "4", pages = "122:1--122:??", month = apr, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3585279", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Jun 1 14:20:55 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3585279", abstract = "Label smoothing has a wide range of applications in the machine learning field. Nonetheless, label smoothing only softens the targets by adding a uniform distribution into a one-hot vector, which cannot truthfully reflect the underlying relations among \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "122", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yan:2023:CLB, author = "Jinghui Yan and Chengqing Zong and Jinan Xu", title = "Combination of Loss-based Active Learning and Semi-supervised Learning for Recognizing Entities in {Chinese} Electronic Medical Records", journal = j-TALLIP, volume = "22", number = "5", pages = "123:1--123:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588314", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588314", abstract = "The recognition of entities in an electronic medical record (EMR) is especially important to downstream tasks, such as clinical entity normalization and medical \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "123", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lv:2023:GDA, author = "Qi Lv and Ziqiang Cao and Lei Geng and Chunhui Ai and Xu Yan and Guohong Fu", title = "General and Domain-adaptive {Chinese} Spelling Check with Error-consistent Pretraining", journal = j-TALLIP, volume = "22", number = "5", pages = "124:1--124:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564271", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564271", abstract = "The lack of label data is one of the significant bottlenecks for Chinese Spelling Check. Existing researches use the automatic generation method by exploiting \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "124", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2023:BLN, author = "Shuheng Wang and Heyan Huang and Shumin Shi", title = "Better Localness for Non-Autoregressive Transformer", journal = j-TALLIP, volume = "22", number = "5", pages = "125:1--125:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587266", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587266", abstract = "The Non-Autoregressive Transformer, due to its low inference latency, has attracted much attention from researchers. Although, the performance of the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "125", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shi:2023:ART, author = "Xuewen Shi and Heyan Huang and Ping Jian and Yi-Kun Tang", title = "Approximating to the Real Translation Quality for Neural Machine Translation via Causal Motivated Methods", journal = j-TALLIP, volume = "22", number = "5", pages = "126:1--126:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583684", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3583684", abstract = "It is hard to evaluate translations objectively and accurately, which limits the applications of machine translation. In this article, we assume that the above \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "126", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Biltawi:2023:ASE, author = "Mariam M. Biltawi and Arafat Awajan and Sara Tedmori", title = "{Arabic} Span Extraction-based Reading Comprehension Benchmark {(ASER)} and Neural Baseline Models", journal = j-TALLIP, volume = "22", number = "5", pages = "127:1--127:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579047", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579047", abstract = "Machine reading comprehension (MRC) requires machines to read and answer questions about a given text. This can be achieved through either predicting \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "127", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{M:2023:TGT, author = "Diviya M. and Karmel A.", title = "{TAM GAN}: {Tamil} Text to Naturalistic Image Synthesis Using Conventional Deep Adversarial Networks", journal = j-TALLIP, volume = "22", number = "5", pages = "128:1--128:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584019", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3584019", abstract = "Text-to-image synthesis has advanced recently as a prospective area for improvement in computer vision applications. The image synthesis \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "128", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2023:OOS, author = "Yuanxia Liu and Tianyong Hao and Hai Liu and Yuanyuan Mu and Heng Weng and Fu Lee Wang", title = "{OdeBERT}: One-stage Deep-supervised Early-exiting {BERT} for Fast Inference in User Intent Classification", journal = j-TALLIP, volume = "22", number = "5", pages = "129:1--129:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587464", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587464", abstract = "User intent classification is a vital task for analyzing users' essential requirements from the users' input query in information retrieval systems, question \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "129", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Srivastava:2023:ESI, author = "Gautam Srivastava and Jerry Chun-Wei Lin and Yu-Dong Zhang", title = "Editorial for the Special Issue on Computational Linguistics Processing in Low-Resource Indigenous Languages", journal = j-TALLIP, volume = "22", number = "5", pages = "130:1--130:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591208", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3591208", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "130", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bashir:2023:CAE, author = "Muhammad Farrukh Bashir and Abdul Rehman Javed and Muhammad Umair Arshad and Thippa Reddy Gadekallu and Waseem Shahzad and Mirza Omer Beg", title = "Context-aware Emotion Detection from Low-resource {Urdu} Language Using Deep Neural Network", journal = j-TALLIP, volume = "22", number = "5", pages = "131:1--131:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3528576", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3528576", abstract = "Emotion detection (ED) plays a vital role in determining individual interest in any field. Humans use gestures, facial expressions, and voice pitch and choose words to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "131", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Manjari:2023:QQE, author = "Kanak Manjari and Madhushi Verma and Gaurav Singal and Suyel Namasudra", title = "{QEST}: Quantized and Efficient Scene Text Detector Using Deep Learning", journal = j-TALLIP, volume = "22", number = "5", pages = "132:1--132:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3526217", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3526217", abstract = "Scene text detection is complicated and one of the most challenging tasks due to different environmental restrictions, such as illuminations, lighting conditions, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "132", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2023:HDL, author = "Akshi Kumar and Saurabh Raj Sangwan and Adarsh Kumar Singh and Gandharv Wadhwa", title = "Hybrid Deep Learning Model for Sarcasm Detection in {Indian} Indigenous Language Using Word-Emoji Embeddings", journal = j-TALLIP, volume = "22", number = "5", pages = "133:1--133:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3519299", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3519299", abstract = "Automated sarcasm detection is deemed as a complex natural language processing task and extending it to a morphologically-rich and free-order dominant \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "133", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chopra:2023:FOH, author = "Abhishek Chopra and Deepak Kumar Sharma and Aashna Jha and Uttam Ghosh", title = "A Framework for Online Hate Speech Detection on Code-mixed {Hindi-English} Text and {Hindi} Text in {Devanagari}", journal = j-TALLIP, volume = "22", number = "5", pages = "134:1--134:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568673", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3568673", abstract = "Social Media has been growing and has provided the world with a platform to opine, debate, display, and discuss like never before. It has a major influence in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "134", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2023:JID, author = "Yirui Wu and Hao Li and Lilai Zhang and Chen Dong and Qian Huang and Shaohua Wan", title = "Joint Intent Detection Model for Task-oriented Human-Computer Dialogue System using Asynchronous Training", journal = j-TALLIP, volume = "22", number = "5", pages = "135:1--135:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3558096", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3558096", abstract = "How to accurately understand low-resource languages is the core of the task-oriented human-computer dialogue system. Language understanding consists \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "135", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chakraborty:2023:EOB, author = "Chinmay Chakraborty and Shaohua Wan and Mohammad R. Khosravi", title = "Editorial: Ontology-based Knowledge Presentation and Computational Linguistics for Semantic Big Social Data Analytics in {Asian} Social Networks", journal = j-TALLIP, volume = "22", number = "5", pages = "136:1--136:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594719", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3594719", abstract = "Data-driven ontology-based knowledge (OK) presentation and computational linguistics for evolving semantic Asian social networks (ASNs) can make one of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "136", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2023:SNA, author = "Shashank Sheshar Singh and Vishal Srivastava and Ajay Kumar and Shailendra Tiwari and Dilbag Singh and Heung-No Lee", title = "Social Network Analysis: a Survey on Measure, Structure, Language Information Analysis, Privacy, and Applications", journal = j-TALLIP, volume = "22", number = "5", pages = "137:1--137:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3539732", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3539732", abstract = "The rapid growth in popularity of online social networks provides new opportunities in computer science, sociology, math, information studies, biology, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "137", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Anwar:2023:SRA, author = "Sibgha Anwar and Mirza Omer Beg and Kiran Saleem and Zeeshan Ahmed and Abdul Rehman Javed and Usman Tariq", title = "Social Relationship Analysis Using State-of-the-art Embeddings", journal = j-TALLIP, volume = "22", number = "5", pages = "138:1--138:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3539608", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3539608", abstract = "Detection of human relationships from their interactions on social media is a challenging problem with a wide range of applications in different areas, like targeted \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "138", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2023:SML, author = "Neha Sharma and Mukesh Soni and Sumit Kumar and Rajeev Kumar and Nabamita Deb and Anurag Shrivastava", title = "Supervised Machine Learning Method for Ontology-based Financial Decisions in the Stock Market", journal = j-TALLIP, volume = "22", number = "5", pages = "139:1--139:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554733", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3554733", abstract = "For changing semantics, ontological and information presentation, as well as computational linguistics for Asian social networks, are one of the most \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "139", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiang:2023:GVS, author = "Wei Jiang and Mengqi Li and Mohammad Shabaz and Ashutosh Sharma and Mohd Anul Haq", title = "Generation of Voice Signal Tone Sandhi and Melody Based on Convolutional Neural Network", journal = j-TALLIP, volume = "22", number = "5", pages = "140:1--140:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3545569", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3545569", abstract = "There is a need to prevent the use of modulated voice signals to conduct criminal activities. Voice signal change detection based on convolutional neural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "140", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bibi:2023:RCR, author = "Nazia Bibi and Tauseef Rana and Ayesha Maqbool and Tamim Alkhalifah and Wazir Zada Khan and Ali Kashif Bashir and Yousaf {Bin Zikria}", title = "Reusable Component Retrieval: a Semantic Search Approach for Low-Resource Languages", journal = j-TALLIP, volume = "22", number = "5", pages = "141:1--141:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564604", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564604", abstract = "A common practice among programmers is to reuse existing code, accomplished by performing natural language queries through search engines. The main aim of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "141", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liao:2023:IRA, author = "Junwei Liao and Sefik Eskimez and Liyang Lu and Yu Shi and Ming Gong and Linjun Shou and Hong Qu and Michael Zeng", title = "Improving Readability for Automatic Speech Recognition Transcription", journal = j-TALLIP, volume = "22", number = "5", pages = "142:1--142:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3557894", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3557894", abstract = "Modern Automatic Speech Recognition (ASR) systems can achieve high performance in terms of recognition accuracy. However, a perfectly accurate transcript still \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "142", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:UBH, author = "Qian Chen and Xiao Sun and Jiamin Wang and Meng Wang", title = "User-based Hierarchical Network of {Sina Weibo} Emotion Analysis", journal = j-TALLIP, volume = "22", number = "5", pages = "143:1--143:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3579048", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579048", abstract = "Emotion analysis on Sina Weibo has a great impetus for government agencies to survey public opinion and enterprises to track market demand. Most of the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "143", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pei:2023:SGS, author = "Jiaming Pei and Kaiyang Zhong and Zhi Yu and Lukun Wang and Kuruva Lakshmanna", title = "Scene Graph Semantic Inference for Image and Text Matching", journal = j-TALLIP, volume = "22", number = "5", pages = "144:1--144:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3563390", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3563390", abstract = "With the rapid development of information technology, image and text data have increased dramatically. Image and text matching techniques enable \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "144", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liang:2023:WDD, author = "Kun Liang and Ruhui Ma and Yang Hua and Hao Wang and Ningxin Hu and Tao Song and Honghao Gao and Haibing Guan", title = "{WH$^2$D$^2$N$^2$}: Distributed {AI}-enabled {OK-ASN} Service for {Web of Things}", journal = j-TALLIP, volume = "22", number = "5", pages = "145:1--145:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3564242", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3564242", abstract = "Model data-driven ontology and knowledge presentation for evolving semantic Asian social networks (OK-ASN) is a critical strategy for web of things (WoT) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "145", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2023:CMP, author = "Zongda Wu and Jian Xie and Shigen Shen and Chongze Lin and Guandong Xu and Enhong Chen", title = "A Confusion Method for the Protection of User Topic Privacy in {Chinese} Keyword-based Book Retrieval", journal = j-TALLIP, volume = "22", number = "5", pages = "146:1--146:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571731", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571731", abstract = "In this article, aiming at a Chinese keyword-based book search service, from a technological perspective, we propose to modify a user query sequence \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "146", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Belhadi:2023:FAF, author = "Asma Belhadi and Youcef Djenouri and Gautam Srivastava and Jerry Chun-Wei Lin", title = "Fast and Accurate Framework for Ontology Matching in {Web of Things}", journal = j-TALLIP, volume = "22", number = "5", pages = "147:1--147:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3578708", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3578708", abstract = "The Web of Things (WoT) can help with knowledge discovery and interoperability issues in many Internet of Things (IoT) applications. This article focuses \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "147", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Berhoum:2023:IAB, author = "Adel Berhoum and Mohammed Charaf Eddine Meftah and Abdelkader Laouid and Mohammad Hammoudeh", title = "An Intelligent Approach Based on Cleaning up of Inutile Contents for Extremism Detection and Classification in Social Networks", journal = j-TALLIP, volume = "22", number = "5", pages = "148:1--148:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3575802", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3575802", abstract = "Extremism is a growing threat worldwide that presents a significant danger to public safety and national security. Social networks provide extremists with spaces to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "148", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Deng:2023:CFA, author = "Xiaoheng Deng and Dingjie Han and Ping Jiang", title = "A Context-focused Attention Evolution Model for Aspect-based Sentiment Classification", journal = j-TALLIP, volume = "22", number = "5", pages = "149:1--149:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587465", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587465", abstract = "Due to their inherent capability in the semantic alignment of aspects and their context words, Attention and Long-Short-Term-Memory (LSTM) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "149", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2023:OLD, author = "Sanjay Kumar and Akshi Kumar and Abhishek Mallik and Sakshi Dhall", title = "Opinion Leader Detection in {Asian} Social Networks using Modified Spider Monkey Optimization", journal = j-TALLIP, volume = "22", number = "5", pages = "150:1--150:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3555311", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3555311", abstract = "The Asian social networks are dominated by the society's collectivist culture, and this interestingly introduces an influence mechanism aided by \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "150", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2023:PDU, author = "Akshi Kumar and Rohit Beniwal and Dipika Jain", title = "Personality Detection using Kernel-based Ensemble Model for Leveraging Social Psychology in Online Networks", journal = j-TALLIP, volume = "22", number = "5", pages = "151:1--151:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3571584", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571584", abstract = "The Asian social networking market dominates the world landscape with the highest consumer penetration rate. Businesses and investors often \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "151", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:MSF, author = "Yuanpeng Zhang and Yizhang Jiang and Jolfaei Alireza", title = "Mutual Supervised Fusion \& Transfer Learning with Interpretable Linguistic Meaning for Social Data Analytics", journal = j-TALLIP, volume = "22", number = "5", pages = "152:1--152:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3568675", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3568675", abstract = "Social data analytics is often taken as the most commonly used method for community discovery, product recommendations, knowledge graph, and so on. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "152", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chauhan:2023:RBF, author = "Shweta Chauhan and Jayashree Premkumar Shet and Shehab Mohamed Beram and Vishal Jagota and Mohammed Dighriri and Mohd Wazih Ahmad and Md Shamim Hossain and Ali Rizwan", title = "Rule Based Fuzzy Computing Approach on Self-Supervised Sentiment Polarity Classification with Word Sense Disambiguation in Machine Translation for {Hindi} Language", journal = j-TALLIP, volume = "22", number = "5", pages = "153:1--153:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3574130", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3574130", abstract = "With increasing globalization, communication among people of diverse cultural backgrounds is also taking place to a very large extent in the present \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "153", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jin:2023:SPT, author = "Dawei Jin and Yiyi Hu and Jingyu Chen and Mengran Xia", title = "Stock Price Trends Prediction Based on the Classical Models with Key Information Fusion of Ontologies", journal = j-TALLIP, volume = "22", number = "5", pages = "154:1--154:??", month = may, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592599", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jun 9 06:53:47 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592599", abstract = "An ontology of the financial field can support effective association and integration of financial knowledge. Based on behavioral finance, social media is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "154", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dey:2023:DBO, author = "Raghunath Dey and Rakesh Chandra Balabantaray", title = "Development of a Benchmark {Odia} Handwritten Character Database for an Efficient Offline Handwritten Character Recognition with a Chronological Survey", journal = j-TALLIP, volume = "22", number = "6", pages = "155:1--155:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583988", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3583988", abstract = "A good benchmark dataset is a primary requirement in the offline handwritten character recognition (HCR) process. Only three handwritten numerals and alphabet datasets from Odia are publicly accessible for study, although many writers have used several \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "155", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saeed:2023:DOL, author = "Ramsha Saeed and Hammad Afzal and Sadaf Abdul Rauf and Naima Iltaf", title = "Detection of Offensive Language and {ITS} Severity for Low Resource Language", journal = j-TALLIP, volume = "22", number = "6", pages = "156:1--156:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3580476", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3580476", abstract = "Continuous proliferation of hate speech in different languages on social media has drawn significant attention from researchers in the past decade. Detecting hate speech is indispensable irrespective of the scale of use of language, as it inflicts huge \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "156", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2023:CAT, author = "Xin Huang and Jiajun Zhang and Chengqing Zong", title = "Contrastive Adversarial Training for Multi-Modal Machine Translation", journal = j-TALLIP, volume = "22", number = "6", pages = "157:1--157:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587267", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587267", abstract = "The multi-modal machine translation task is to improve translation quality with the help of additional visual input. It is expected to disambiguate or complement semantics while there are ambiguous words or incomplete expressions in the sentences. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "157", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jia:2023:TMA, author = "Yue Jia and Wei Fang and Heng-Yang Lu", title = "Think More Ambiguity Less: a Novel Dual Interactive Model with Local and Global Semantics for {Chinese} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "6", pages = "158:1--158:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3583685", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3583685", abstract = "Chinese is a representative East Asian language. Chinese Named Entity Recognition (CNER) aims to recognize various entities. It is significant for other NLP tasks to utilize CNER. Recent research to develop CNER systems has been dedicated to either \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "158", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2023:KEP, author = "Hu Huang and Bowen Zhang and Yangyang Li and Baoquan Zhang and Yuxi Sun and Chuyao Luo and Cheng Peng", title = "Knowledge-enhanced Prompt-tuning for Stance Detection", journal = j-TALLIP, volume = "22", number = "6", pages = "159:1--159:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588767", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588767", abstract = "Investigating public attitudes on social media is important in opinion mining systems. Stance detection aims to analyze the attitude of an opinionated text (e.g., favor, neutral, or against) toward a given target. Existing methods mainly address this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "159", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2023:BBF, author = "Feng Zhao and Cheng Yan and Hai Jin and Lifang He", title = "{BayesKGR}: {Bayesian} Few-Shot Learning for Knowledge Graph Reasoning", journal = j-TALLIP, volume = "22", number = "6", pages = "160:1--160:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589183", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589183", abstract = "Reasoning over knowledge graphs (KGs) has received increasing attention recently due to its promising applications in many areas, such as semantic search and recommendation systems. Subsequently, most reasoning models are inherently transductive and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "160", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2023:ITM, author = "Ringki Das and Thoudam Doren Singh", title = "Image-Text Multimodal Sentiment Analysis Framework of {Assamese} News Articles Using Late Fusion", journal = j-TALLIP, volume = "22", number = "6", pages = "161:1--161:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3584861", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3584861", abstract = "Before the arrival of the web as a corpus, people detected positive and negative news based on the understanding of the textual content from physical newspaper rather than an automatic identification approach from readily available e-newspapers. Thus, the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "161", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Munir:2023:SSS, author = "Kashif Munir and Hai Zhao and Zuchao Li", title = "Semi-Supervised Semantic Role Labeling with Bidirectional Language Models", journal = j-TALLIP, volume = "22", number = "6", pages = "162:1--162:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587160", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587160", abstract = "The recent success of neural networks in NLP applications has provided a strong impetus to develop supervised models for semantic role labeling (SRL) that forego the requirement for extensive feature engineering. Recent state-of-the-art approaches require \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "162", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yi:2023:IRP, author = "Nian Yi and Chenze Shao and Aishan Wumaier", title = "Integrating Reconstructor and Post-{Editor} into Neural Machine Translation", journal = j-TALLIP, volume = "22", number = "6", pages = "163:1--163:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588766", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588766", abstract = "Neural machine translation (NMT) mainly comprises the encoder and decoder. The encoder is mainly used to extract the feature vector of the source language sentence. The decoder predicts the next token according to the feature vector extracted by the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "163", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Parte:2023:EAD, author = "Smita Athanere Parte and Ankur Ratmele and Ritesh Dhanare", title = "An Efficient and Accurate Detection of Fake News Using Capsule Transient Auto Encoder", journal = j-TALLIP, volume = "22", number = "6", pages = "164:1--164:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589184", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589184", abstract = "Fake news is ``news reports that are deliberatively and indisputably fake.'' News that uses fake information is becoming a threat. It becomes challenging for humans to distinguish between fake and actual news. It has become necessary to detect fake news, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "164", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2023:LLF, author = "Richa Sharma and Arti Arya", title = "{LFWE}: Linguistic Feature Based Word Embedding for {Hindi} Fake News Detection", journal = j-TALLIP, volume = "22", number = "6", pages = "165:1--165:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589764", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589764", abstract = "It is essential for research communities to investigate ways for authenticating news. The use of linguistic feature based analysis to automatically detect false news is gaining popularity among the scientific community. However, such techniques are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "165", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Thin:2023:VSA, author = "Dang Van Thin and Duong Ngoc Hao and Ngan Luu-Thuy Nguyen", title = "{Vietnamese} Sentiment Analysis: an Overview and Comparative Study of Fine-tuning Pretrained Language Models", journal = j-TALLIP, volume = "22", number = "6", pages = "166:1--166:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589131", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589131", abstract = "Sentiment Analysis (SA) is one of the most active research areas in the Natural Language Processing (NLP) field due to its potential for business and society. With the development of language representation models, numerous methods have shown promising \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "166", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dalai:2023:PST, author = "Tusarkanta Dalai and Tapas Kumar Mishra and Pankaj K. Sa", title = "Part-of-Speech Tagging of {Odia} Language Using Statistical and Deep Learning Based Approaches", journal = j-TALLIP, volume = "22", number = "6", pages = "167:1--167:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588900", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588900", abstract = "Automatic part-of-speech (POS) tagging is a preprocessing step of many natural language processing tasks, such as named entity recognition, speech processing, information extraction, word sense disambiguation, and machine translation. It has already \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "167", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2023:KHN, author = "Niraj Kumar Singh and Komal Naaz and Soubhik Chakraborty", title = "{Komala} and {Ka{\d{t}}hora}: a Novel Approach Towards Classification of {Hindi} Poetry", journal = j-TALLIP, volume = "22", number = "6", pages = "168:1--168:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589249", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589249", abstract = "Literary compositions are very often analyzed using various constituent units like words, phrases, sentences, and paragraphs. Unlike the conventional research that focuses on the aforementioned constituent units, our task is a statistical effort carried \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "168", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2023:IMN, author = "Sudhansu Bala Das and Atharv Biradar and Tapas Kumar Mishra and Bidyut Kr. Patra", title = "Improving Multilingual Neural Machine Translation System for {Indic} Languages", journal = j-TALLIP, volume = "22", number = "6", pages = "169:1--169:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3587932", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3587932", abstract = "The Machine Translation System (MTS) serves as effective tool for communication by translating text or speech from one language to another language. Recently, neural machine translation (NMT) has become popular for its performance and cost-effectiveness. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "169", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khanmohammadi:2023:PBT, author = "Reza Khanmohammadi and Mitra Sadat Mirshafiee and Yazdan Rezaee Jouryabi and Seyed Abolghasem Mirroshandel", title = "{Prose2Poem}: The Blessing of Transformers in Translating Prose to {Persian} Poetry", journal = j-TALLIP, volume = "22", number = "6", pages = "170:1--170:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592791", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592791", abstract = "Persian poetry has consistently expressed its philosophy, wisdom, speech, and rationale based on its couplets, making it an enigmatic language on its own to both native and non-native speakers. Nevertheless, the noticeable gap between Persian prose and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "170", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2023:TTE, author = "Liang Yang and Zhexu Shen and Fengqing Zhou and Hongfei Lin and Junpeng Li", title = "{TPoet}: Topic-Enhanced {Chinese} Poetry Generation", journal = j-TALLIP, volume = "22", number = "6", pages = "171:1--171:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593805", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3593805", abstract = "Chinese poetry generation has been a challenging part of natural language processing due to the unique literariness and aesthetics of poetry. In most cases, the content of poetry is topic related. In other words, specific thoughts or emotions are usually \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "171", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shamas:2023:MML, author = "Mohsen Shamas and Wassim {El Hajj} and Hazem Hajj and Khaled Shaban", title = "Metadial: a Meta-learning Approach for {Arabic} Dialogue Generation", journal = j-TALLIP, volume = "22", number = "6", pages = "172:1--172:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3590960", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3590960", abstract = "Dialogue generation is the automatic generation of a text response, given a user's input. Dialogue generation for low-resource languages has been a challenging tasks for researchers. However, the advancements in deep learning models have made developing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "172", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharjeel:2023:CLT, author = "Muhammad Sharjeel and Iqra Muneer and Sumaira Nosheen and Rao Muhammad Adeel Nawab and Paul Rayson", title = "Cross-lingual Text Reuse Detection at Document Level for {English--Urdu} Language Pair", journal = j-TALLIP, volume = "22", number = "6", pages = "173:1--173:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592761", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592761", abstract = "In recent years, the problem of Cross-Lingual Text Reuse Detection (CLTRD) has gained the interest of the research community due to the availability of large digital repositories and automatic Machine Translation (MT) systems. These systems are readily \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "173", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:ERV, author = "Fan Zhang and Meishan Zhang and Shuang Liu and Yueheng Sun and Nan Duan", title = "Enhancing {RDF} Verbalization with Descriptive and Relational Knowledge", journal = j-TALLIP, volume = "22", number = "6", pages = "174:1--174:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3595293", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3595293", abstract = "RDF verbalization has received increasing interest, which aims to generate a natural language description of the knowledge base. Sequence-to-sequence models based on Transformer are able to obtain strong performance equipped with pre-trained language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "174", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shafi:2023:STU, author = "Jawad Shafi and Rao Muhammad Adeel Nawab and Paul Rayson", title = "Semantic Tagging for the {Urdu} Language: Annotated Corpus and Multi-Target Classification Methods", journal = j-TALLIP, volume = "22", number = "6", pages = "175:1--175:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3582496", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3582496", abstract = "Extracting and analysing meaning-related information from natural language data has attracted the attention of researchers in various fields, such as natural language processing, corpus linguistics, information retrieval, and data science. An important \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "175", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2023:CLS, author = "Yuxin Huang and Yin Liang and Zhaoyuan Wu and Enchang Zhu and Zhengtao Yu", title = "Cross-lingual Sentence Embedding for Low-resource {Chinese--Vietnamese} Based on Contrastive Learning", journal = j-TALLIP, volume = "22", number = "6", pages = "176:1--176:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3589341", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3589341", abstract = "Cross-lingual sentence embedding's goal is mapping sentences with similar semantics but in different languages close together and dissimilar sentences farther apart in the representation space. It is the basis of many downstream tasks such as cross-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "176", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liao:2023:TPC, author = "Junwei Liao and Shuai Cheng and Minghuan Tan", title = "Text Polishing with {Chinese} Idiom: Task, Datasets and Pre-trained Baselines", journal = j-TALLIP, volume = "22", number = "6", pages = "177:1--177:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593806", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3593806", abstract = "This work presents the task of text polishing, which generates a sentence that is more graceful than the input sentence while retaining its semantic meaning. Text polishing has great value in real usage and is an important component in modern writing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "177", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khellas:2023:ARD, author = "Kenza Khellas and Rachid Seghir", title = "{Alabib-65}: a Realistic Dataset for {Algerian} Sign Language Recognition", journal = j-TALLIP, volume = "22", number = "6", pages = "178:1--178:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596909", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3596909", abstract = "Sign language recognition (SLR) is a promising research field that aims to blur boundaries between Deaf and hearing people by creating a system that can transcribe signs into a written or vocal language. There is a growing body of literature that \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "178", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khalid:2023:UDA, author = "Hamza Khalid and Ghulam Murtaza and Qaiser Abbas", title = "Using Data Augmentation and Bidirectional Encoder Representations from Transformers for Improving {Punjabi} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "6", pages = "179:1--179:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3595861", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3595861", abstract = "Named entity recognition (NER) is a task of proper noun identification from natural language text and classification into various types such as location, person, and organization. Due to NER's applications in different natural language processing (NLP) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "179", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2023:SNN, author = "Shuai Zhao and Qing Li and Yuer Yang and Jinming Wen and Weiqi Luo", title = "From Softmax to Nucleusmax: a Novel Sparse Language Model for {Chinese} Radiology Report Summarization", journal = j-TALLIP, volume = "22", number = "6", pages = "180:1--180:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3596219", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3596219", abstract = "The Chinese radiology report summarization is a crucial component in smart healthcare that employs language models to summarize key findings in radiology reports and communicate these findings to physicians. However, most language models for radiology \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "180", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abbache:2023:IAD, author = "Mohamed Abbache and Ahmed Abbache and Jingwen Xu and Farid Meziane and Xianbin Wen", title = "The Impact of {Arabic} Diacritization on Word Embeddings", journal = j-TALLIP, volume = "22", number = "6", pages = "181:1--181:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592603", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592603", abstract = "Word embedding is used to represent words for text analysis. It plays an essential role in many Natural Language Processing (NLP) studies and has hugely contributed to the extraordinary developments in the field in the last few years. In Arabic, diacritic \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "181", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Park:2023:RMT, author = "Cheoneum Park and Juae Kim", title = "Robust Multi-task Learning-based {Korean} {POS} Tagging to Overcome Word Spacing Errors", journal = j-TALLIP, volume = "22", number = "6", pages = "182:1--182:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591206", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3591206", abstract = "End-to-end neural network-based approaches have recently demonstrated significant improvements in natural language processing (NLP). However, in the NLP application such as assistant systems, NLP components are still processed to extract results using a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "182", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:MBB, author = "Zezhong Li and Xiao Sun and Fuji Ren and Jianjun Ma and Degen Huang and Piao Shi", title = "Multilingual {BERT}-based Word Alignment By Incorporating Common {Chinese} Characters", journal = j-TALLIP, volume = "22", number = "6", pages = "183:1--183:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594634", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3594634", abstract = "Word alignment is an important task of detecting translation equivalents between a sentence pair. Although word alignment is no longer necessarily needed for neural machine translation, it's still useful in a wealth of applications, e.g., bilingual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "183", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khairunnisa:2023:DEM, author = "Siti Oryza Khairunnisa and Zhousi Chen and Mamoru Komachi", title = "Dataset Enhancement and Multilingual Transfer for Named Entity Recognition in the {Indonesian} Language", journal = j-TALLIP, volume = "22", number = "6", pages = "184:1--184:??", month = jun, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592854", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Jul 1 13:42:00 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592854", abstract = "Named entity recognition in the Indonesian language has significantly developed in recent years. However, it still lacks standardized publicly available corpora; a small dataset is available but suffers from inconsistent annotations. Therefore, we re-. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "184", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Droua-Hamdani:2023:MSR, author = "Ghania Droua-Hamdani", title = "{MSA} Speech Rhythm Pattern in a Multilingual Setting", journal = j-TALLIP, volume = "22", number = "7", pages = "185:1--185:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593295", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3593295", abstract = "This study examines variation in rhythm metrics in a multilingual setting by focusing on between-speaker differences. The investigation analyzes speech rhythm patterns of segmental \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "185", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Taj:2023:USE, author = "Soonh Taj and Ghulam Mujtaba and Sher Muhammad Daudpota and Muhammad Hussain Mughal", title = "{Urdu} Speech Emotion Recognition: a Systematic Literature Review", journal = j-TALLIP, volume = "22", number = "7", pages = "186:1--186:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3595377", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3595377", abstract = "Research on Speech Emotion Recognition is becoming more mature day by day, and a lot of research is being carried out on Speech Emotion Recognition in resource-rich languages \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "186", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mosa:2023:ELR, author = "Mohamed Atef Mosa", title = "An Exhaustive Literature Review of {Hadith} Text Mining", journal = j-TALLIP, volume = "22", number = "7", pages = "187:1--187:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3588315", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3588315", abstract = "The Quran and the hadith of the Prophet are the two sources of legislation for Muslims. Sharia rulings and laws are not only derived from the Quran but also the bulk of them come \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "187", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Khan:2023:ACT, author = "Sulaiman Khan and Shah Nazir and Habib Ullah Khan", title = "Analysis of Cursive Text Recognition Systems: a Systematic Literature Review", journal = j-TALLIP, volume = "22", number = "7", pages = "188:1--188:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592600", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592600", abstract = "Regional and cultural diversities around the world have given birth to a large number of writing systems and scripts, which consist of varying character sets. Developing an optimal \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "188", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gharagozlou:2023:SRE, author = "Hamid Gharagozlou and Javad Mohammadzadeh and Azam Bastanfard and Saeed Shiry Ghidary", title = "Semantic Relation Extraction: a Review of Approaches, Datasets, and Evaluation Methods With Looking at the Methods and Datasets in the {Persian} Language", journal = j-TALLIP, volume = "22", number = "7", pages = "189:1--189:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592601", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592601", abstract = "A large volume of unstructured data, especially text data, is generated and exchanged daily. Consequently, the importance of extracting patterns and discovering knowledge \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "189", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kivaisi:2023:SSD, author = "Alexander R. Kivaisi and Qingjie Zhao and Jimmy T. Mbelwa", title = "{Swahili} Speech Dataset Development and Improved Pre-training Method for Spoken Digit Recognition", journal = j-TALLIP, volume = "22", number = "7", pages = "190:1--190:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597494", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3597494", abstract = "Speech dataset is an essential component in building commercial speech applications. However, low-resource languages such as Swahili lack such a resource that is vital for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "190", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chakrabarty:2023:LRM, author = "Abhisek Chakrabarty and Raj Dabre and Chenchen Ding and Masao Utiyama and Eiichiro Sumita", title = "Low-resource Multilingual Neural Translation Using Linguistic Feature-based Relevance Mechanisms", journal = j-TALLIP, volume = "22", number = "7", pages = "191:1--191:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3594631", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3594631", abstract = "This article investigates approaches to effectively harness source-side linguistic features for low-resource multilingual neural machine translation (MNMT). Previous works focus on \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "191", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ding:2023:MMA, author = "Ling Ding and Xiaojun Chen and Jian Wei and Yang Xiang", title = "{MABERT}: Mask-Attention-Based {BERT} for {Chinese} Event Extraction", journal = j-TALLIP, volume = "22", number = "7", pages = "192:1--192:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3597455", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3597455", abstract = "Event extraction is an essential but challenging task in information extraction. This task has considerably benefited from pre-trained language models, such as BERT. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "192", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yan:2023:AMT, author = "Yibo Yan and Peng Zhu and Dawei Cheng and Fangzhou Yang and Yifeng Luo", title = "Adversarial Multi-task Learning for Efficient {Chinese} Named Entity Recognition", journal = j-TALLIP, volume = "22", number = "7", pages = "193:1--193:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603626", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603626", abstract = "Named entity recognition (NER) is a fundamental task for information extraction applications. NER is challenging because of semantic ambiguities in academic literature, especially for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "193", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mekki:2023:TTA, author = "Asma Mekki and In{\`e}s Zribi and Mariem Ellouze and Lamia Hadrich Belguith", title = "Tokenization of {Tunisian Arabic}: a Comparison between Three Machine Learning Models", journal = j-TALLIP, volume = "22", number = "7", pages = "194:1--194:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3599234", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3599234", abstract = "Tokenization represents the way of segmenting a piece of text into smaller units called tokens. Since Arabic is an agglutinating language by nature, this treatment becomes a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "194", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mirishkar:2023:ICC, author = "Ganesh S. Mirishkar and Vishnu Vidyadhara Raju V and Meher Dinesh Naroju and Sudhamay Maity and Prakash Yalla and Anil Kumar Vuppala", title = "{IIITH-CSTD Corpus}: Crowdsourced Strategies for the Collection of a Large-scale {Telugu} Speech Corpus", journal = j-TALLIP, volume = "22", number = "7", pages = "195:1--195:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3600228", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3600228", abstract = "Due to the lack of a large annotated speech corpus, many low-resource Indian languages struggle to utilize recent advancements in deep neural network architectures for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "195", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yue:2023:RMG, author = "Jianyu Yue and Yiwen Sun and Xiaojun Bi and Zheng Chen and Yu Zhang", title = "Retrospective Multi-granularity Fusion Network for {Chinese} Idiom Cloze-style Reading Comprehension", journal = j-TALLIP, volume = "22", number = "7", pages = "196:1--196:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603370", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603370", abstract = "Chinese idiom cloze-style reading comprehension task is of great significance for improving the machine's ability to understand Chinese idioms, which is one of the essential \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "196", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sangsavate:2023:ESL, author = "Suntarin Sangsavate and Sukree Sinthupinyo and Achara Chandrachai", title = "Experiments of Supervised Learning and Semi-Supervised Learning in {Thai} Financial News Sentiment: a Comparative Study", journal = j-TALLIP, volume = "22", number = "7", pages = "197:1--197:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603499", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603499", abstract = "Sentiment classification is an instrument of natural language processing tasks in text analysis to measure customer feedback from given documents such as product \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "197", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaur:2023:BEA, author = "Harmandeep Kaur and Munish Kumar and Aastha Gupta and Monika Sachdeva and Ajay Mittal and Krishan Kumar", title = "Bagging: an Ensemble Approach for Recognition of Handwritten Place Names in {Gurumukhi} Script", journal = j-TALLIP, volume = "22", number = "7", pages = "198:1--198:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3593024", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3593024", abstract = "In this article, the authors present an effort to recognize handwritten Gurumukhi place names for use in postal automation. Five feature extraction techniques (zoning, \ldots{})", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "198", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jin:2023:IAP, author = "Feihu Jin and Jinliang Lu and Jiajun Zhang and Chengqing Zong", title = "Instance-Aware Prompt Learning for Language Understanding and Generation", journal = j-TALLIP, volume = "22", number = "7", pages = "199:1--199:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3604613", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3604613", abstract = "Prompt learning has emerged as a new paradigm for leveraging pre-trained language models (PLMs) and has shown promising results in downstream tasks with only a slight \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "199", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Jarrah:2023:ABS, author = "Ibrahim Al-Jarrah and Ahmad M. Mustafa and Hassan Najadat", title = "Aspect-Based Sentiment Analysis for {Arabic} Food Delivery Reviews", journal = j-TALLIP, volume = "22", number = "7", pages = "200:1--200:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605146", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605146", abstract = "Business customers and consumers share their reviews online on social platforms such as Twitter. Therefore, Twitter data sentiment analysis is extremely useful for both \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "200", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Salloum:2023:NEA, author = "Said Salloum and Tarek Gaber and Sunil Vadera and Khaled Shaalan", title = "A New {English\slash Arabic} Parallel Corpus for Phishing Emails", journal = j-TALLIP, volume = "22", number = "7", pages = "201:1--201:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606031", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3606031", abstract = "Phishing involves malicious activity whereby phishers, in the disguise of legitimate entities, obtain illegitimate access to the victims' personal and private information, usually \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "201", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sahu:2023:SCB, author = "Siba Sankar Sahu and Sukomal Pal", title = "A Study on Corpus-based Stopword Lists in {Indian} Language {IR}", journal = j-TALLIP, volume = "22", number = "7", pages = "202:1--202:??", month = jul, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606262", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3606262", abstract = "We explore and evaluate the effect of different stopword lists (non-corpus-based and corpus-based) in the information retrieval (IR) tasks with different Indian languages such as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "202", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cao:2023:ORS, author = "Jihua Cao and Jie Li and Miao Yin and Yunfeng Wang", title = "Online Reviews Sentiment Analysis and Product Feature Improvement with Deep Learning", journal = j-TALLIP, volume = "22", number = "8", pages = "203:1--203:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3522575", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3522575", abstract = "The text mining of online reviews is currently a popular research direction of e-commerce and is considered the next blue ocean. Online reviews can dig out consumer preferences and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "203", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mujahid:2023:ACT, author = "Muhammad Mujahid and Khadija Kanwal and Furqan Rustam and Wajdi Aljadani and Imran Ashraf", title = "{Arabic} {ChatGPT} Tweets Classification Using {RoBERTa} and {BERT} Ensemble Model", journal = j-TALLIP, volume = "22", number = "8", pages = "204:1--204:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605889", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605889", abstract = "ChatGPT OpenAI, a large-language chatbot model, has gained a lot of attention due to its popularity and impressive performance in many natural language processing tasks. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "204", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Omari:2023:SWG, author = "Hani Al-Omari and Rehab Duwairi", title = "{So2al-wa-Gwab}: a New {Arabic} Question-Answering Dataset Trained on Answer Extraction Models", journal = j-TALLIP, volume = "22", number = "8", pages = "205:1--205:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605550", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605550", abstract = "Question answering (QA) is the task of responding to questions posed by users automatically. A question-answering system is divided into three main components: \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "205", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhu:2023:QCE, author = "Wenhao Zhu and Xiaoyu Zhang and Liang Ye and Qiuhong Zhai", title = "Query Context Expansion for Open-Domain Question Answering", journal = j-TALLIP, volume = "22", number = "8", pages = "206:1--206:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603498", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603498", abstract = "Humans are accustomed to autonomously associating prior knowledge with the text in a query when answering questions. However, for machines lacking cognition and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "206", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{An:2023:PBL, author = "Bo An", title = "Prompt-based for Low-Resource {Tibetan} Text Classification", journal = j-TALLIP, volume = "22", number = "8", pages = "207:1--207:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3603168", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603168", abstract = "Text classification is a critical and foundational task in Tibetan natural language processing, it plays a crucial role in various applications, such as sentiment analysis and information \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "207", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Djaidri:2023:CSL, author = "Asma Djaidri and Hassina Aliane and Hamid Azzoune", title = "The Contribution of Selected Linguistic Markers for Unsupervised {Arabic} Verb Sense Disambiguation", journal = j-TALLIP, volume = "22", number = "8", pages = "208:1--208:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605777", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605777", abstract = "Word sense disambiguation (WSD) is the task of automatically determining the meaning of a polysemous word in a specific context. Word sense induction is the unsupervised \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "208", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lalrempuii:2023:IUN, author = "Candy Lalrempuii and Badal Soni", title = "Investigating Unsupervised Neural Machine Translation for Low-resource Language Pair {English--Mizo} via Lexically Enhanced Pre-trained Language Models", journal = j-TALLIP, volume = "22", number = "8", pages = "209:1--209:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609222", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3609222", abstract = "The vast majority of languages in the world at present are considered to be low-resource languages. Since the availability of large parallel data is crucial for the success of most \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "209", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fenta:2023:AII, author = "Anduamlak Abebe Fenta and Seffi Gebeyehu", title = "Automatic Idiom Identification Model for {Amharic} Language", journal = j-TALLIP, volume = "22", number = "8", pages = "210:1--210:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606864", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3606864", abstract = "Idiomatic expressions are important natural parts of all languages and prominent parts of our daily speech. Idioms cannot be interpreted from the words that they are formed with \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "210", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rai:2023:DLB, author = "Pooja Rai and Sanjay Chatterji and Byung-Gyu Kim", title = "Deep Learning-based Sequence Labeling Tools for {Nepali}", journal = j-TALLIP, volume = "22", number = "8", pages = "211:1--211:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3606696", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3606696", abstract = "A Part-of-Speech (POS) tagger and Chunker (or shallow parser) are sequence labeling tools, crucial for improving the accuracy of Natural Language Processing (NLP) tasks like \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "211", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Trujillo-Romero:2023:MSL, author = "Felipe Trujillo-Romero and Gibran Garc{\'{\i}}a-Bautista", title = "{Mexican} Sign Language Corpus: Towards an Automatic Translator", journal = j-TALLIP, volume = "22", number = "8", pages = "212:1--212:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3591471", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3591471", abstract = "The development of the Sign Language Corpus has been motivated by its great utility and application to various purposes and research areas. However, some countries do not \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "212", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lim:2023:ILM, author = "Ee Suan Lim and Wei Qi Leong and Thanh Ngan Nguyen and Wei Ming Kng and William Chandra Tjhi and Dea Adhista and Ayu Purwarianti", title = "{ICON}: a Linguistically-Motivated Large-Scale Benchmark {Indonesian} Constituency {Treebank}", journal = j-TALLIP, volume = "22", number = "8", pages = "213:1--213:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3609798", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3609798", abstract = "Constituency parsing is an important task of informing how words are combined to form sentences. While constituency parsing in English has seen significant progress in the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "213", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Osman:2023:EFD, author = "Taha Osman and Hussein Khalil and Mohammed Miltan and Khaled Shaalan and Rowida Alfrjani", title = "Exploiting Functional Discourse Grammar to Enhance Complex {Arabic} Relation Extraction using a Hybrid Semantic Knowledge Base --- Machine Learning Approach", journal = j-TALLIP, volume = "22", number = "8", pages = "214:1--214:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610581", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610581", abstract = "Relation extraction from unstructured Arabic text is especially challenging due to the Arabic language complex morphology and the variation in word semantics and lexical \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "214", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Song:2023:SSS, author = "Haiyue Song and Raj Dabre and Chenhui Chu and Sadao Kurohashi and Eiichiro Sumita", title = "{SelfSeg}: a Self-supervised Sub-word Segmentation Method for Neural Machine Translation", journal = j-TALLIP, volume = "22", number = "8", pages = "215:1--215:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610611", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610611", abstract = "Sub-word segmentation is an essential pre-processing step for Neural Machine Translation (NMT). Existing work has shown that neural sub-word segmenters are better than \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "215", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xie:2023:CGM, author = "Zheyong Xie and Weidong He and Tong Xu and Shiwei Wu and Chen Zhu and Ping Yang and Enhong Chen", title = "Comprehending the Gossips: Meme Explanation in Time-Sync Video Comment via Multimodal Cues", journal = j-TALLIP, volume = "22", number = "8", pages = "216:1--216:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3612920", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3612920", abstract = "Recent years have witnessed the booming of online social media platforms with embracing the popular service called ``Time-Sync Comment'', which supports the viewers to share \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "216", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dahou:2023:DSA, author = "Brahim Dahou and Leila Falek and Mourad Abbas and Slimane Mekaoui and Mohamed Lichouri and Aicha Zitouni", title = "{DZ-SMS}: an Authentic Corpus of {Algerian SMS}", journal = j-TALLIP, volume = "22", number = "8", pages = "217:1--217:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610522", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610522", abstract = "In this article, a complete methodology of a corpus realization of authentic Short Message Service (SMS) from Algerian dialect and which are transcribed in Latin characters or \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "217", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Thin:2023:SLR, author = "Dang Van Thin and Duong Ngoc Hao and Ngan Luu-Thuy Nguyen", title = "A Systematic Literature Review on {Vietnamese} Aspect-based Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "8", pages = "218:1--218:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610226", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610226", abstract = "Aspect-based sentiment analysis (ABSA) is one of the principal tasks in the automatic deep understanding of texts, widely applied in a broad range of real-world applications. Many \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "218", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Do:2023:SFE, author = "Duc-Hao Do and Thanh-Duc Chau and Thai-Son Tran", title = "Speech Feature Enhancement based on Time-frequency Analysis", journal = j-TALLIP, volume = "22", number = "8", pages = "219:1--219:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3605549", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605549", abstract = "Time-frequency analysis (TFA) is a powerful method to exploit the hidden information of signals, including speech signals. Many techniques in this group were invented \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "219", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Haulai:2023:CME, author = "Thangkhanhau Haulai and Jamal Hussain", title = "Construction of {Mizo:English} Parallel Corpus for Machine Translation", journal = j-TALLIP, volume = "22", number = "8", pages = "220:1--220:??", month = aug, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610404", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Aug 30 05:32:44 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610404", abstract = "Parallel corpus is a key component of statistical and Neural Machine Translation (NMT). While most research focuses on machine translation, corpus creation studies are limited for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "220", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2023:HCC, author = "Lihua Huang and Peng Zheng", title = "Human-Computer Collaborative Visual Design Creation Assisted by Artificial Intelligence", journal = j-TALLIP, volume = "22", number = "9", pages = "221:1--221:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3554735", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3554735", abstract = "With the support and promotion of big data and cloud computing, AI has penetrated into every field of people's lives more and more deeply, with its characteristics of sustainable work, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "221", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{L:2023:EST, author = "Anand Babu G. L. and Srinivasu Badugu", title = "Extractive Summarization of {Telugu} Text Using Modified Text Rank and Maximum Marginal Relevance", journal = j-TALLIP, volume = "22", number = "9", pages = "222:1--222:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3600224", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3600224", abstract = "With the rapid growth of digital content, there is a need for an automatic text summarizer to provide short text from a long text document. Many research works have been \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "222", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kim:2023:NKN, author = "Hwichan Kim and Hirasawa Tosho and Sangwhan Moon and Naoaki Okazaki and Mamoru Komachi", title = "{North Korean} Neural Machine Translation through {South Korean} Resources", journal = j-TALLIP, volume = "22", number = "9", pages = "223:1--223:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3608947", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3608947", abstract = "South and North Korea both use the Korean language. However, Korean natural language processing (NLP) research has mostly focused on South Korean language. Therefore, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "223", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmat:2023:WXI, author = "Ahtamjan Ahmat and Yating Yang and Bo Ma and Rui Dong and Kaiwen Lu and Lei Wang", title = "{WAD-X}: Improving Zero-shot Cross-lingual Transfer via Adapter-based Word Alignment", journal = j-TALLIP, volume = "22", number = "9", pages = "224:1--224:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610289", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610289", abstract = "Multilingual pre-trained language models (mPLMs) have achieved remarkable performance on zero-shot cross-lingual transfer learning. However, most mPLMs implicitly \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "224", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:ISS, author = "Weizhao Zhang and Hongwu Yang", title = "Improving Sequence-to-sequence {Tibetan} Speech Synthesis with Prosodic Information", journal = j-TALLIP, volume = "22", number = "9", pages = "225:1--225:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3616012", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3616012", abstract = "There are about 6,000 languages worldwide, most of which are low-resource languages. Although the current speech synthesis (or text-to-speech, TTS) for major languages \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "225", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cao:2023:CSR, author = "Yidan Cao and Qingshan Wang and Qi Wang and Peng Liu", title = "Can Same-right-and-different-left Gestures Be Recognized with Only Right-hand Signals?", journal = j-TALLIP, volume = "22", number = "9", pages = "226:1--226:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617370", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3617370", abstract = "Sign language serves as a bridge between the hearing-impaired and other people. Existing sensor-based approaches tend to only collect data from the dominant hand. Does \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "226", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mehmood:2023:EML, author = "Faiza Mehmood and Rehab Shahzadi and Hina Ghafoor and Muhammad Nabeel Asim and Muhammad Usman Ghani and Waqar Mahmood and Andreas Dengel", title = "{EnML}: Multi-label Ensemble Learning for {Urdu} Text Classification", journal = j-TALLIP, volume = "22", number = "9", pages = "227:1--227:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3616111", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3616111", abstract = "Exponential growth of electronic data requires advanced multi-label classification approaches for the development of natural language processing (NLP) applications such as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "227", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Ibrahim:2023:DHS, author = "Rogayah M. Al-Ibrahim and Mostafa Z. Ali and Hassan M. Najadat", title = "Detection of Hateful Social Media Content for {Arabic} Language", journal = j-TALLIP, volume = "22", number = "9", pages = "228:1--228:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3592792", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3592792", abstract = "Social media is a common medium for expression of views, discussion, sharing of content, and promotion of products and ideas. These views are either polite or obscene. The growth of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "228", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chaudhari:2023:MTE, author = "Prasad Chaudhari and Pankaj Nandeshwar and Shubhi Bansal and Nagendra Kumar", title = "{MahaEmoSen}: Towards Emotion-aware Multimodal {Marathi} Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "9", pages = "229:1--229:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3618057", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3618057", abstract = "With the advent of the Internet, social media platforms have witnessed an enormous increase in user-generated textual and visual content. Microblogs on platforms such as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "229", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:IGA, author = "Wanting Li and Yiting Chen and Buzhou Tang", title = "Improving Generative Adversarial Network-based Vocoding through Multi-scale Convolution", journal = j-TALLIP, volume = "22", number = "9", pages = "230:1--230:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610532", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610532", abstract = "Vocoding is a sub-process of text-to-speech task, which aims at generating audios from intermediate representations between text and audio. Several recent works have \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "230", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gezmu:2023:MBN, author = "Andargachew Mekonnen Gezmu and Andreas N{\"u}rnberger", title = "Morpheme-Based Neural Machine Translation Models for Low-Resource Fusion Languages", journal = j-TALLIP, volume = "22", number = "9", pages = "231:1--231:??", month = sep, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3610773", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Sep 28 05:47:03 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3610773", abstract = "Neural approaches, which are currently state-of-the-art in many areas, have contributed significantly to the exciting advancements in machine translation. However, Neural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "231", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yonghe:2023:CSS, author = "Wang Yonghe and Feilong Bao and Gaunglai Gao", title = "A Comparative Study on Selecting Acoustic Modeling Units for {WFST}-based {Mongolian} Speech Recognition", journal = j-TALLIP, volume = "22", number = "10", pages = "232:1--232:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617830", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3617830", abstract = "Traditional weighted finite-state transducer- (WFST) based Mongolian automatic speech recognition (ASR) systems use phonemes as pronunciation lexicon modeling units. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "232", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rayala:2023:SAC, author = "Upendar Rao Rayala and Karthick Seshadri and Nagesh Bhattu Sristy", title = "Sentiment Analysis of Code-Mixed {Telugu--English} Data Leveraging Syllable and Word Embeddings", journal = j-TALLIP, volume = "22", number = "10", pages = "233:1--233:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3620670", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3620670", abstract = "Learning the inherent meaning of a word in Natural Language Processing (NLP) has motivated researchers to represent a word at various levels of abstraction, namely \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "233", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zoya:2023:AUL, author = "Zoya and Seemab Latif and Rabia Latif and Hammad Majeed and Nor Shahida Mohd Jamail", title = "Assessing {Urdu} Language Processing Tools via Statistical and Outlier Detection Methods on {Urdu} Tweets", journal = j-TALLIP, volume = "22", number = "10", pages = "234:1--234:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3622939", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3622939", abstract = "Text pre-processing is a crucial step in Natural Language Processing (NLP) applications, particularly for handling informal and noisy content on social media. Word-level \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "234", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pathak:2023:PST, author = "Dhrubajyoti Pathak and Sukumar Nandi and Priyankoo Sarmah", title = "Part-of-speech Tagger for {Assamese} Using Ensembling Approach", journal = j-TALLIP, volume = "22", number = "10", pages = "235:1--235:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3617653", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3617653", abstract = "Ensemble system for part-of-speech (POS) tagging is beneficial for many resource-poor languages that do not have enough annotated training data to train Deep Learning \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "235", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2023:DDI, author = "Shu Zhao and Weifeng Liu and Jie Chen and Xiao Sun", title = "{DIEU}: a Dynamic Interaction Emotion Unit for Emotion Recognition in Conversation", journal = j-TALLIP, volume = "22", number = "10", pages = "236:1--236:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3616493", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3616493", abstract = "Emotion recognition in conversation (ERC) is challenging because the conversation takes place in real time and the speakers interact with each other. However, existing methods \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "236", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bensalem:2023:APP, author = "Raja Bensalem and Kais Haddar and Philippe Blache", title = "An {Arabic} Probabilistic Parser Based on a Property Grammar", journal = j-TALLIP, volume = "22", number = "10", pages = "237:1--237:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3612921", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3612921", abstract = "The specificities of Arabic parsing, such as agglutination, vocalization, and the relatively order-free words in Arabic sentences, remain major issues to consider. To promote \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "237", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zheng:2023:MEM, author = "Cheng Zheng and Haojie Xu and Xiao Sun", title = "{MHG-ERC}: Multi-hypergraph Feature Aggregation Network for Emotion Recognition in Conversations", journal = j-TALLIP, volume = "22", number = "10", pages = "238:1--238:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3622935", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3622935", abstract = "The modeling of conversational context is an essential step in Emotion Recognition in Conversations (ERC). To maintain high performance and a low GPU memory \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "238", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumari:2023:HTS, author = "Namrata Kumari and Pardeep Singh", title = "{Hindi} Text Summarization Using Sequence to Sequence Neural Network", journal = j-TALLIP, volume = "22", number = "10", pages = "239:1--239:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3624013", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3624013", abstract = "Text summarizing reduces a large block of text data to a precise, short, and intelligible text that conveys the whole meaning of the actual text in a few words while maintaining the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "239", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Duong:2023:FCV, author = "Huong T. Duong and Van H. Ho and Phuc Do", title = "Fact-checking {Vietnamese} Information Using {Knowledge Graph}, {Datalog}, and {KG-BERT}", journal = j-TALLIP, volume = "22", number = "10", pages = "240:1--240:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3624557", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3624557", abstract = "In the era of digital information, ensuring the accuracy and reliability of information is crucial, making fact-checking a vital process. Currently, English fact-checking has thrived due \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "240", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2023:GGA, author = "Santosh Kumar Mishra and Soham Chakraborty and Sriparna Saha and Pushpak Bhattacharyya", title = "{GAGPT-2}: a Geometric Attention-based {GPT-2} Framework for Image Captioning in {Hindi}", journal = j-TALLIP, volume = "22", number = "10", pages = "241:1--241:??", month = oct, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3622936", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 3 14:32:07 MDT 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3622936", abstract = "Image captioning frameworks usually employ an encoder-decoder paradigm, with the encoder receiving abstract image feature vectors as input and the decoder for language modeling. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "241", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{BenMesmia:2023:SAB, author = "Fatma {Ben Mesmia} and Malek Mouhoub", title = "Semi-Automatic Building and Learning of a Multilingual Ontology", journal = j-TALLIP, volume = "22", number = "11", pages = "242:1--242:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3615864", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3615864", abstract = "Most online platforms, applications, and Websites use a massive amount of heterogeneous evolving data. These data must be structured and normalized before integration to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "242", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xie:2023:CED, author = "Dongdong Xie and Fei Li and Bobo Li and Chong Teng and Donghong Ji and Meishan Zhang", title = "{Chinese} Event Discourse Deixis Resolution: Design of the Dataset and Model", journal = j-TALLIP, volume = "22", number = "11", pages = "243:1--243:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3618109", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3618109", abstract = "Anaphora resolution is a traditional task in the natural language processing community, defined as a cohesion phenomenon where one entity points back to a previous entity. Event \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "243", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2023:TBM, author = "Yaqi Chen and Wenlin Zhang and Hao Zhang and Dan Qu and Xu-Kui Yang", title = "Task-based Meta Focal Loss for Multilingual Low-resource Speech Recognition", journal = j-TALLIP, volume = "22", number = "11", pages = "244:1--244:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626187", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626187", abstract = "Low-resource automatic speech recognition is a challenging task due to a lack of labeled training data. To resolve this issue, multilingual meta-learning learns a better model \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "244", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ghasemi:2023:HDC, author = "Rouzbeh Ghasemi and Saeedeh Momtazi", title = "How a Deep Contextualized Representation and Attention Mechanism Justifies Explainable Cross-Lingual Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "11", pages = "245:1--245:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626094", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626094", abstract = "The number of applications in sentiment analysis is growing daily, and research in this field is increasing. Despite the rapid growth of data sources in English, low-resource \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "245", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Haffar:2023:SBL, author = "Nafaa Haffar and Mounir Zrigui", title = "A Synergistic Bidirectional {LSTM} and {$N$}-gram Multi-channel {CNN} Approach Based on {BERT} and {FastText} for {Arabic} Event Identification", journal = j-TALLIP, volume = "22", number = "11", pages = "246:1--246:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3626568", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626568", abstract = "Event extraction from texts continues to pose a challenge for many NLP systems. This article presents a novel neural network architecture that can extract and classify events \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "246", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Saidi:2023:SBC, author = "Rakia Saidi and Fethi Jarray", title = "Stacking of {BERT} and {CNN} Models for {Arabic} Word Sense Disambiguation", journal = j-TALLIP, volume = "22", number = "11", pages = "247:1--247:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3623379", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3623379", abstract = "We propose a new approach for Arabic Word Sense Disambiguation (AWSD) by hybridization of single-layer Convolutional Neural Network (CNN) with contextual representation \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "247", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2023:RST, author = "Tianlong Ma and Xiangcheng Du and Xingjiao Wu and Zhao Zhou and Yingbin Zheng and Cheng Jin", title = "Reading Scene Text with Aggregated Temporal Convolutional Encoder", journal = j-TALLIP, volume = "22", number = "11", pages = "248:1--248:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3625822", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3625822", abstract = "Reading scene text in the natural image is of fundamental importance in many real-world problems. Text recognition has a profound effect on information processing by enabling \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "248", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chang:2023:STB, author = "Yung-Chun Chang and Siu Hin Ng and Jung-Peng Chen and Yu-Chi Liang and Wen-Lian Hsu", title = "Semantic Template-based Convolutional Neural Network for Text Classification", journal = j-TALLIP, volume = "22", number = "11", pages = "249:1--249:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3627820", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3627820", abstract = "We propose a semantic template-based distributed representation for the convolutional neural network called Semantic Template-based Convolutional Neural Network (STCNN) \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "249", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:MTP, author = "Da Li and Boqing Zhu and Sen Yang and Kele Xu and Ming Yi and Yukai He and Huaimin Wang", title = "Multi-task Pre-training Language Model for Semantic Network Completion", journal = j-TALLIP, volume = "22", number = "11", pages = "250:1--250:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3627704", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3627704", abstract = "Semantic networks, exemplified by the knowledge graph, serve as a means to represent knowledge by leveraging the structure of a graph. While the knowledge graph exhibits \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "250", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2023:FGD, author = "Meishan Zhang and Peiming Guo and Peijie Jiang and Dingkun Long and Yueheng Sun and Guangwei Xu and Pengjun Xie and Min Zhang", title = "Fine-Grained Domain Adaptation for {Chinese} Syntactic Processing", journal = j-TALLIP, volume = "22", number = "11", pages = "251:1--251:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3629519", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3629519", abstract = "Syntactic processing is fundamental to natural language processing. It provides rich and comprehensive syntax information in sentences that could be potentially beneficial for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "251", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tang:2023:SES, author = "Xiaoyu Tang and Mengyun Zheng and Jiewen Feng and Jiazheng Huang and Yayun Gong", title = "Shortcut Enhanced Syntactic and Semantic Dual-channel Network for Aspect-based Sentiment Analysis", journal = j-TALLIP, volume = "22", number = "11", pages = "252:1--252:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3629518", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3629518", abstract = "Aspect-based sentiment analysis (ABSA) is a fine-grained task that predicts the sentiment polarity of different aspects in the same sentence. The main challenge is how to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "252", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jin:2023:FKP, author = "Weiqiang Jin and Biao Zhao and Yu Zhang and Gege Sun and Hang Yu", title = "{Fintech Key-Phrase}: a New {Chinese} Financial High-Tech Dataset Accelerating Expression-Level Information Retrieval", journal = j-TALLIP, volume = "22", number = "11", pages = "253:1--253:??", month = nov, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3627989", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3627989", abstract = "Expression-level information extraction is a challenging task in natural language processing (NLP), which aims to retrieve crucial semantic information from linguistic documents. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "253", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Prakash:2023:CLS, author = "Jothi {Prakash V} and Arul Antran Vijay S.", title = "Cross-lingual Sentiment Analysis of {Tamil} Language Using a Multi-stage Deep Learning Architecture", journal = j-TALLIP, volume = "22", number = "12", pages = "254:1--254:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3631391", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3631391", abstract = "In recent years, sentiment analysis has become a focal point in natural language processing. Cross-lingual sentiment analysis is a particularly demanding yet essential task that \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "254", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Belay:2023:FRN, author = "Birhanu Hailu Belay and Gebeyehu Belay Gebremeskel and Belete Biazen Bezabih and Seffi Gebeyehu", title = "Factorized Recurrent Neural Network with Attention for Language Identification and Content Detection", journal = j-TALLIP, volume = "22", number = "12", pages = "255:1--255:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3630607", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3630607", abstract = "Language identification and content detection are essential for ensuring effective digital communication, and content moderation. While extensive research has primarily \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "255", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jia:2023:SAI, author = "Zhaohong Jia and Yunwei Shi and Weifeng Liu and Zhenhua Huang and Xiao Sun", title = "Speaker-Aware Interactive Graph Attention Network for Emotion Recognition in Conversation", journal = j-TALLIP, volume = "22", number = "12", pages = "256:1--256:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3627806", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3627806", abstract = "Recently, Emotion Recognition in Conversation (ERC) has attracted much attention and has become a hot topic in the field of natural language processing. Conversation is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "256", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hamadouche:2023:AAC, author = "Khaoula Hamadouche and Kheira Zineb Bousmaha and Mohamed Abdelwaret Bekkoucha and Lamia Hadrich-Belguith", title = "{AlgBERT}: Automatic Construction of Annotated Corpus for Sentiment Analysis in {Algerian} Dialect", journal = j-TALLIP, volume = "22", number = "12", pages = "257:1--257:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3632948", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3632948", abstract = "Nowadays, sentiment analysis is one of the most crucial research fields of Natural Language Processing (NLP), and it is widely applied in a variety of applications such as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "257", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Modak:2023:ALC, author = "Masooda M. Modak and Prachi Gharpure and Sasikumar M", title = "Adaptive Learning and Correlative Assessment of Differential Usage Patterns for Students with-or-without Learning Disabilities via Learning Analytics", journal = j-TALLIP, volume = "22", number = "12", pages = "258:1--258:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3632365", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3632365", abstract = "Learning Disabilities (LD) can be categorized into logical, analytical, grammatical, vocabulary, sequential, and inference disabilities. Analysis of such disabilities assists students to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "258", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2023:DBB, author = "Daiyi Li and Li Yan and Zongmin Ma", title = "Dependency-based {BERT} for {Chinese} Event Argument Extraction", journal = j-TALLIP, volume = "22", number = "12", pages = "259:1--259:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3633306", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3633306", abstract = "Existing event extraction methods independently identify and classify each argument role separately, ignoring the interdependence between different parameter roles. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "259", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bensalah:2023:CSD, author = "Nouhaila Bensalah and Habib Ayad and Abdellah Adib and Abdelhamid {Ibn El Farouk}", title = "A Comparative Study of Different Dimensionality Reduction Techniques for {Arabic} Machine Translation", journal = j-TALLIP, volume = "22", number = "12", pages = "260:1--260:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3634681", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3634681", abstract = "Word embeddings are widely deployed in a tremendous range of fundamental natural language processing applications and are also useful for generating representations \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "260", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Husain:2023:NKA, author = "Fatemah Husain", title = "A Novel Knowledge-augmented Model Customization Approach for {Arabic} Offensive Language Detection", journal = j-TALLIP, volume = "22", number = "12", pages = "261:1--261:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3634702", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3634702", abstract = "Multiple attempts to develop systems for detecting online Arabic offensive language have been explored in previous studies. However, most of these attempts do not consider the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "261", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ganjalipour:2023:NER, author = "Ebrahim Ganjalipour and Amir Hossein Refahi Sheikhani and Sohrab Kordrostami and Ali Asghar Hosseinzadeh", title = "Named Entity Recognition in {Persian} Language based on Self-attention Mechanism with Weighted Relational Position Encoding", journal = j-TALLIP, volume = "22", number = "12", pages = "262:1--262:??", month = dec, year = "2023", CODEN = "????", DOI = "https://doi.org/10.1145/3633513", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Dec 21 10:38:42 MST 2023", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3633513", abstract = "Named-entity Recognition (NER) is challenging for languages with low digital resources. The main difficulties arise from the scarcity of annotated corpora and the consequent \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "262", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmed:2024:EIA, author = "Usman Ahmed and Jerry Chun-Wei Lin and Gautam Srivastava", title = "Emotional Intelligence Attention Unsupervised Learning Using Lexicon Analysis for Irony-based Advertising", journal = j-TALLIP, volume = "23", number = "1", pages = "1:1--1:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3580496", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3580496", abstract = "Social media platforms have made increasing use of irony in recent years. Users can express their ironic thoughts with audio, video, and images attached to text content. When you \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Madan:2024:CFS, author = "Chetan Madan and Harshita Diddee and Deepika Kumar and Mamta Mittal", title = "{CodeFed}: Federated Speech Recognition for Low-Resource Code-Switching Detection", journal = j-TALLIP, volume = "23", number = "1", pages = "2:1--2:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3571732", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3571732", abstract = "One common constraint in the practical application of speech recognition is Code Switching. The issue of code-switched languages is especially aggravated in the context \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alyami:2024:IAS, author = "Sarah Alyami and Hamzah Luqman and Mohammad Hammoudeh", title = "Isolated {Arabic} Sign Language Recognition Using a Transformer-based Model and Landmark Keypoints", journal = j-TALLIP, volume = "23", number = "1", pages = "3:1--3:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3584984", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3584984", abstract = "Pose-based approaches for sign language recognition provide light-weight and fast models that can be adopted in real-time applications. This article presents a framework for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tejaswini:2024:DDS, author = "Vankayala Tejaswini and Korra Sathya Babu and Bibhudatta Sahoo", title = "Depression Detection from Social Media Text Analysis using Natural Language Processing Techniques and Hybrid Deep Learning Model", journal = j-TALLIP, volume = "23", number = "1", pages = "4:1--4:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3569580", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3569580", abstract = "Depression is a kind of emotion that negatively impacts people's daily lives. The number of people suffering from long-term feelings is increasing every year across the globe. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:SCU, author = "Ankit Kumar and Surbhi Bhatia and Mohammad R. Khosravi and Arwa Mashat and Parul Agarwal", title = "Semantic and Context Understanding for Sentiment Analysis in {Hindi} Handwritten Character Recognition Using a Multiresolution Technique", journal = j-TALLIP, volume = "23", number = "1", pages = "5:1--5:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3557895", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3557895", abstract = "The rapid growth of Web 2.0, which enables people to generate, communicate, and share information, has resulted in an increase in the total number of users. In developing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ren:2024:ICC, author = "Tianyu Ren and Dengfeng Yao and Chaoran Yang and Xinchen Kang", title = "The Influence of {Chinese} Characters on {Chinese} Sign Language", journal = j-TALLIP, volume = "23", number = "1", pages = "6:1--6:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3591465", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3591465", abstract = "Chinese Sign Language (CSL) and Chinese are languages used in the Chinese mainland. As a dominant language, Chinese has great influence on all levels of CSL. CSL, as a visual sign \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gambhir:2024:EEM, author = "Pooja Gambhir and Amita Dev and Poonam Bansal and Deepak Kumar Sharma", title = "End-to-end Multi-modal Low-resourced Speech Keywords Recognition Using Sequential {Conv2D} Nets", journal = j-TALLIP, volume = "23", number = "1", pages = "7:1--7:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3606019", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3606019", abstract = "Advanced Neural Networks are widely used to recognize multi-modal conversational speech with significant improvements in accuracy automatically. Significantly, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Roy:2024:DEN, author = "Pradeep Kumar Roy", title = "Deep Ensemble Network for Sentiment Analysis in Bi-lingual Low-resource Languages", journal = j-TALLIP, volume = "23", number = "1", pages = "8:1--8:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3600229", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3600229", abstract = "Sentiment analysis (SA) is the systematic identification, extraction, quantification, and study of affective states and subjective information using natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:NSD, author = "Sanjay Kumar", title = "Negative Stances Detection from Multilingual Data Streams in Low-Resource Languages on Social Media Using {BERT} and {CNN}-Based Transfer Learning Model", journal = j-TALLIP, volume = "23", number = "1", pages = "9:1--9:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625821", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3625821", abstract = "Online social media allows users to connect with a large number of people across the globe and facilitate the exchange of information efficiently. These platforms cater to many of our \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zheng:2024:UII, author = "Jiangbo Zheng and Ying Liang", title = "User Interest Identification with Social Media Information using Natural Language and Meta-Heuristic Technique", journal = j-TALLIP, volume = "23", number = "1", pages = "10:1--10:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3579165", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579165", abstract = "As the number of Internet users and social networking apps has grown in recent years, interest-based recommendation systems have been more commonly used in practice. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2024:EWB, author = "Vijay Sharma and Namita Mittal and Ankit Vidyarthi and Deepak Gupta", title = "Exploring {Web}-Based Translation Resources Applied to {Hindi--English} Cross-Lingual Information Retrieval", journal = j-TALLIP, volume = "23", number = "1", pages = "11:1--11:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3569010", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3569010", abstract = "Internet users perceive a multilingual web but are unfamiliar with it due to communication in their regional language called Cross-Lingual Information Retrieval (CLIR). In CLIR, a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rezaee:2024:CYU, author = "Khosro Rezaee and Hossein Ghayoumi Zadeh and Lianyong Qi and Hamidreza Rabiee and Mohammad R. Khosravi", title = "Can You Understand Why {I} Am Crying? {A} Decision-making System for Classifying Infants' Cry Languages Based on {DeepSVM} Model", journal = j-TALLIP, volume = "23", number = "1", pages = "12:1--12:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3579032", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3579032", abstract = "Scientific and therapeutic advances in perinatology and neonatology have improved the survival prospects of preterm and extremely-low-birth-weight infants. Infants' cries \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chakraborty:2024:ECH, author = "Angana Chakraborty and Subhankar Joardar and Arif Ahmed Sekh", title = "Ensemble Classifier for {Hindi} Hostile Content Detection", journal = j-TALLIP, volume = "23", number = "1", pages = "13:1--13:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3591353", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3591353", abstract = "Detection of hostile content from social media posts (Facebook, Twitter, etc.) is a demanding task in the field of Natural Language Processing. The increase of hostile content in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wei:2024:MLT, author = "Xiao Wei and Jianbao Huang and Rui Zhao and Hang Yu and Zheng Xu", title = "Multi-Label Text Classification Model Based on Multi-Level Constraint Augmentation and Label Association Attention", journal = j-TALLIP, volume = "23", number = "1", pages = "14:1--14:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3586008", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3586008", abstract = "In the multi-label text classification task, a text usually corresponds to multiple label categories, and the labels have correlation and hierarchical structure. However, when the label \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bozuyla:2024:SAT, author = "Mehmet Bozuyla", title = "Sentiment Analysis of {Turkish} Drug Reviews with Bidirectional Encoder Representations from Transformers", journal = j-TALLIP, volume = "23", number = "1", pages = "15:1--15:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626523", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626523", abstract = "Sentiment analysis of user generated product or service reviews is significant to enhance quality. Healthcare related computational linguistics studies, particularly analysis of drug \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dash:2024:RMD, author = "Deba Prasad Dash and Maheshkumar Kolekar and Chinmay Chakraborty and Mohammad R. Khosravi", title = "Review of Machine and Deep Learning Techniques in Epileptic Seizure Detection using Physiological Signals and Sentiment Analysis", journal = j-TALLIP, volume = "23", number = "1", pages = "16:1--16:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3552512", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3552512", abstract = "Epilepsy is one of the significant neurological disorders affecting nearly 65 million people worldwide. The repeated seizure is characterized as epilepsy. Different algorithms were \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2024:OBN, author = "Deepak Kumar Jain and Shamimul Qamar and Saurabh Raj Sangwan and Weiping Ding and Anand J. Kulkarni", title = "Ontology-Based Natural Language Processing for Sentimental Knowledge Analysis Using Deep Learning Architectures", journal = j-TALLIP, volume = "23", number = "1", pages = "17:1--17:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624012", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3624012", abstract = "When tested with popular datasets, sentiment categorization using deep learning (DL) algorithms will produce positive results. Building a corpus on novel themes to train \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dave:2024:SRS, author = "Nakul R. Dave and Mayuri A. Mehta and Ketan Kotecha", title = "A Systematic Review of Stemmers of {Indian} and Non-{Indian} Vernacular Languages", journal = j-TALLIP, volume = "23", number = "1", pages = "18:1--18:??", month = jan, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604612", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:56 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3604612", abstract = "The stemming process is crucial and significant in the pre-processing step of natural language processing. The stemmer oversees the stemming process. It facilitates the extraction of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dalai:2024:DLB, author = "Tusarkanta Dalai and Tapas Kumar Mishra and Pankaj K. Sa", title = "Deep Learning-based {POS} Tagger and Chunker for {Odia} Language Using Pre-trained Transformers", journal = j-TALLIP, volume = "23", number = "2", pages = "19:1--19:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3637877", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3637877", abstract = "Developing effective natural language processing (NLP) tools for low-resourced languages poses significant challenges. This article centers its attention on the task of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Man:2024:ESG, author = "Zhibo Man and Yujie Zhang and Yu Li and Yuanmeng Chen and Yufeng Chen and Jinan Xu", title = "An Ensemble Strategy with Gradient Conflict for Multi-Domain Neural Machine Translation", journal = j-TALLIP, volume = "23", number = "2", pages = "20:1--20:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638248", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638248", abstract = "Multi-domain neural machine translation aims to construct a unified neural machine translation model to translate sentences across various domains. Nevertheless, previous \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zheng:2024:HNN, author = "Cheng Zheng and Haojie Xu and Xiao Sun", title = "Hypergraph Neural Network for Emotion Recognition in Conversations", journal = j-TALLIP, volume = "23", number = "2", pages = "21:1--21:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638760", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638760", abstract = "Modeling conversational context is an essential step for emotion recognition in conversations. Existing works still suffer from insufficient utilization of local context information \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chungnoi:2024:CMS, author = "Krittanut Chungnoi and Rachada Kongkachandra and Sarun Gulyanon", title = "The Computational Method for Supporting {Thai} {VerbNet} Construction", journal = j-TALLIP, volume = "23", number = "2", pages = "22:1--22:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638533", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638533", abstract = "VerbNet is a lexical resource for verbs that has many applications in natural language processing tasks, especially ones that require information about both the syntactic behavior and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sweidan:2024:AFE, author = "Asmaa Hashem Sweidan and Nashwa El-Bendary and Esraa Elhariri", title = "Autoregressive Feature Extraction with Topic Modeling for Aspect-based Sentiment Analysis of {Arabic} as a Low-resource Language", journal = j-TALLIP, volume = "23", number = "2", pages = "23:1--23:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638050", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638050", abstract = "This paper proposes an approach for aspect-based sentiment analysis of Arabic social data, especially the considerable text corpus generated through communications on X \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:DBM, author = "Haixia Wang and Yingyu Mao and Qingran Miao and Qun Xiao and Yilong Zhang", title = "Dual-Branch Multitask Fusion Network for Offline {Chinese} Writer Identification", journal = j-TALLIP, volume = "23", number = "2", pages = "24:1--24:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638554", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638554", abstract = "Chinese characters are complex and contain discriminative information, meaning that their writers have the potential to be recognized using less text. In this study, offline Chinese \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Oriola:2024:IDM, author = "Oluwafemi Oriola and Eduan Kotz{\'e}", title = "Improving the Detection of Multilingual {South African} Abusive Language via Skip-gram Using Joint Multilevel Domain Adaptation", journal = j-TALLIP, volume = "23", number = "2", pages = "25:1--25:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638759", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638759", abstract = "The distinctiveness and sparsity of low-resource multilingual South African abusive language necessitate the development of a novel solution to automatically detect different \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jiao:2024:REA, author = "Songlin Jiao and Zhenfang Zhu and Jiangtao Qi and Fuyong Xu and Hongli Pei and Wenling Wang and Ze Song and Peiyu Liu", title = "A Relation Embedding Assistance Networks for Multi-hop Question Answering", journal = j-TALLIP, volume = "23", number = "2", pages = "26:1--26:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3635114", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3635114", abstract = "Multi-hop Knowledge Graph Question Answering aims at finding an entity to answer natural language questions from knowledge graphs. When humans perform multi-hop \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:DNN, author = "Lingfang Li and Aijun Zhang and Ming-Xing Luo", title = "{DSISA}: a New Neural Machine Translation Combining Dependency Weight and Neighbors", journal = j-TALLIP, volume = "23", number = "2", pages = "27:1--27:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638762", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638762", abstract = "Most of the previous neural machine translations (NMT) rely on parallel corpus. Integrating explicitly prior syntactic structure information can improve the neural machine \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2024:LDG, author = "Tzu-Mi Lin and Man-Chen Hung and Lung-Hao Lee", title = "Leveraging Dual Gloss Encoders in {Chinese} Biomedical Entity Linking", journal = j-TALLIP, volume = "23", number = "2", pages = "28:1--28:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638555", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638555", abstract = "Entity linking is the task of assigning a unique identity to named entities mentioned in a text, a sort of word sense disambiguation that focuses on automatically determining a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nazih:2024:IGI, author = "Waleed Nazih and Amany Fashwan and Amr El-Gendy and Yasser Hifny", title = "{Ibn-Ginni}: an Improved Morphological Analyzer for {Arabic}", journal = j-TALLIP, volume = "23", number = "2", pages = "29:1--29:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639050", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3639050", abstract = "Arabic is a morphologically rich language, which means that the Arabic language has a complicated system of word formation and structure. The affixes in the Arabic \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bhogayata:2024:MLB, author = "Chandrakant K. Bhogayata", title = "A Machine Learning-Based Readability Model for {Gujarati} Texts", journal = j-TALLIP, volume = "23", number = "2", pages = "30:1--30:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3637826", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3637826", abstract = "This study aims to develop a machine learning-based model to predict the readability of Gujarati texts. The dataset was 50 prose passages from Gujarati literature. Fourteen lexical and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mi:2024:MGK, author = "Chenggang Mi and Shaoliang Xie and Yi Fan", title = "Multi-granularity Knowledge Sharing in Low-resource Neural Machine Translation", journal = j-TALLIP, volume = "23", number = "2", pages = "31:1--31:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639930", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3639930", abstract = "As the rapid development of deep learning methods, neural machine translation (NMT) has attracted more and more attention in recent years. However, lack of bilingual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2024:EGK, author = "Zhao Yang and Yuanzhe Zhang and Dianbo Sui and Yiming Ju and Jun Zhao and Kang Liu", title = "Explanation Guided Knowledge Distillation for Pre-trained Language Model Compression", journal = j-TALLIP, volume = "23", number = "2", pages = "32:1--32:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639364", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3639364", abstract = "Knowledge distillation is widely used in pre-trained language model compression, which can transfer knowledge from a cumbersome model to a lightweight one. Though knowledge \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Baruah:2024:TCR, author = "Hemanta Baruah and Sanasam Ranbir Singh and Priyankoo Sarmah", title = "Transliteration Characteristics in {Romanized} {Assamese} Language Social Media Text and Machine Transliteration", journal = j-TALLIP, volume = "23", number = "2", pages = "33:1--33:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639565", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3639565", abstract = "This article aims to understand different transliteration behaviors of Romanized Assamese text on social media. Assamese, a language that belongs to the Indo-Aryan language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:FSI, author = "Hao Wang and Hanwen Shi and Jianyong Duan", title = "Few-shot Incremental Event Detection", journal = j-TALLIP, volume = "23", number = "2", pages = "34:1--34:??", month = feb, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3634747", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:57 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3634747", abstract = "Event detection tasks can enable the quick detection of events from texts and provide powerful support for downstream natural language processing tasks. Most such \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tohidi:2024:PPA, author = "Nasim Tohidi and Chitra Dadkhah and Reza Nouralizadeh Ganji and Ehsan Ghaffari Sadr and Hoda Elmi", title = "{PAMR}: {Persian} Abstract Meaning Representation Corpus", journal = j-TALLIP, volume = "23", number = "3", pages = "35:1--35:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638288", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638288", abstract = "One of the most used and well-known semantic representation models is Abstract Meaning Representation (AMR). This representation has had numerous applications in natural \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nasayreh:2024:ASA, author = "Ahmad Nasayreh and Rabia Emhamed {Al Mamlook} and Ghassan Samara and Hasan Gharaibeh and Mohammad Aljaidi and Dalia Alzu'bi and Essam Al-Daoud and Laith Abualigah", title = "{Arabic} Sentiment Analysis for {ChatGPT} Using Machine Learning Classification Algorithms: a Hyperparameter Optimization Technique", journal = j-TALLIP, volume = "23", number = "3", pages = "36:1--36:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638285", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638285", abstract = "In the realm of ChatGPT's language capabilities, exploring Arabic Sentiment Analysis emerges as a crucial research focus. This study centers on ChatGPT, a popular machine learning \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2024:ESI, author = "Soumen Das and Saroj Kr. Biswas and Biswajit Purkayastha", title = "An Expert System for {Indian} Sign Language Recognition Using Spatial Attention-based Feature and Temporal Feature", journal = j-TALLIP, volume = "23", number = "3", pages = "37:1--37:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643824", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3643824", abstract = "Sign Language (SL) is the only means of communication for the hearing-impaired people. Normal people have difficulty understanding SL, resulting in a communication barrier \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{V:2024:MNA, author = "Lakshmi Lalitha V. and Dinesh Kumar Anguraj", title = "Modeling a Novel Approach for Emotion Recognition Using Learning and Natural Language Processing", journal = j-TALLIP, volume = "23", number = "3", pages = "38:1--38:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641851", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3641851", abstract = "Various facts, including politics, entertainment, industry, and research fields, are connected to analyzing the audience's emotions. Sentiment Analysis (SA) is a Natural Language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:IBB, author = "Qing Li and Weibin Wan and Yuming Zhao and Xiaoyan Jiang", title = "Improved {BIO}-Based {Chinese} Automatic Abstract-Generation Model", journal = j-TALLIP, volume = "23", number = "3", pages = "39:1--39:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643695", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3643695", abstract = "With its unique information-filtering function, text summarization technology has become a significant aspect of search engines and question-and-answer systems. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{N:2024:MVI, author = "Thenmoezhi N. and Perumal B. and Lakshmi A.", title = "Multi-view Image Fusion Using Ensemble Deep Learning Algorithm For {MRI} And {CT} Images", journal = j-TALLIP, volume = "23", number = "3", pages = "40:1--40:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3640811", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3640811", abstract = "Medical image fusions are crucial elements in image-based health care diagnostics or therapies and generic applications of computer visions. However, the majority of existing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2024:STS, author = "Jie Liu and Yaguang Li and Shizhu He and Shun Wu and Kang Liu and Shenping Liu and Jiong Wang and Qing Zhang", title = "{Seq2Set2Seq}: a Two-stage Disentangled Method for Reply Keyword Generation in Social Media", journal = j-TALLIP, volume = "23", number = "3", pages = "41:1--41:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644074", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3644074", abstract = "Social media produces large amounts of content every day. How to predict the potential influences of the contents from a social reply feedback perspective is a key issue that \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Banik:2024:IRA, author = "Debajyoty Banik and Rahul Paul and Rajkumar Singh Rathore and Rutvij H. Jhaveri", title = "Improved Regression Analysis with Ensemble Pipeline Approach for Applications across Multiple Domains", journal = j-TALLIP, volume = "23", number = "3", pages = "42:1--42:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645110", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3645110", abstract = "In this research, we introduce two new machine learning regression methods: the Ensemble Average and the Pipelined Model. These methods aim to enhance traditional regression \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rafi:2024:SSC, author = "Shaik Rafi and Ranjita Das", title = "{SCT}: Summary Caption Technique for Retrieving Relevant Images in Alignment with Multimodal Abstractive Summary", journal = j-TALLIP, volume = "23", number = "3", pages = "43:1--43:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645029", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3645029", abstract = "This work proposes an efficient Summary Caption Technique that considers the multimodal summary and image captions as input to retrieve the correspondence images from the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Devi:2024:DIM, author = "Thiyam Susma Devi and Pradip K. Das", title = "Disambiguation of Isolated {Manipuri} Tonal Contrast Word Pairs Using Acoustic Features", journal = j-TALLIP, volume = "23", number = "3", pages = "44:1--44:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643830", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3643830", abstract = "Manipuri is a low-resource, Tibeto-Burman tonal language spoken mainly in Manipur, a northeastern state of India. Tone identification is crucial to speech comprehension for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bi:2024:CCL, author = "Zhen Bi and Jing Chen and Yinuo Jiang and Feiyu Xiong and Wei Guo and Huajun Chen and Ningyu Zhang", title = "{CodeKGC}: Code Language Model for Generative Knowledge Graph Construction", journal = j-TALLIP, volume = "23", number = "3", pages = "45:1--45:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641850", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3641850", abstract = "Current generative knowledge graph construction approaches usually fail to capture structural knowledge by simply flattening natural language into serialized texts or a specification \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dargan:2024:GCS, author = "Shaveta Dargan and Munish Kumar", title = "Gender Classification System Based on the Behavioral Biometric Modality: Application of Handwritten Text", journal = j-TALLIP, volume = "23", number = "3", pages = "46:1--46:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626236", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626236", abstract = "Forensic Science is a branch of science that deals with the discovery, examination, and analysis of strong elements or evidence involved in the criminal justice system. It involves the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Garg:2024:TMH, author = "Muskan Garg", title = "Towards Mental Health Analysis in Social Media for Low-resourced Languages", journal = j-TALLIP, volume = "23", number = "3", pages = "47:1--47:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638761", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638761", abstract = "The surge in internet use for expression of personal thoughts and beliefs has made it increasingly feasible for the social Natural Language Processing (NLP) research \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kanwal:2024:SSE, author = "Safia Kanwal and Muhammad Kamran Malik and Zubair Nawaz and Khawar Mehmood", title = "{SEEUNRS}: Semantically Enriched Entity-Based {Urdu} News Recommendation System", journal = j-TALLIP, volume = "23", number = "3", pages = "48:1--48:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3639049", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3639049", abstract = "The advancement in the production, distribution, and consumption of news has fostered easy access to the news with fair challenges. The main challenge is to present the right news \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mahata:2024:CBM, author = "Sainik Kumar Mahata and Dipankar Das and Sivaji Bandyopadhyay", title = "Consensus-Based Machine Translation for Code-Mixed Texts", journal = j-TALLIP, volume = "23", number = "3", pages = "49:1--49:??", month = mar, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628427", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Mar 20 07:56:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3628427", abstract = "Multilingualism in India is widespread due to its long history of foreign acquaintances. This leads to the presence of an audience familiar with conversing using more than one \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2024:SAM, author = "Yang Yu and Dong Qiu and Huanyu Wan", title = "Sentiment Analysis Method of Epidemic-related Microblog Based on Hesitation Theory", journal = j-TALLIP, volume = "23", number = "4", pages = "50:1--50:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648360", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3648360", abstract = "The COVID-19 pandemic in 2020 brought an unprecedented global crisis. After two years of control efforts, life gradually returned to the pre-pandemic state, but localized outbreaks \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yuan:2024:CLK, author = "Xiaowei Yuan and Kang Liu and Yequan Wang", title = "Contrastive Language-knowledge Graph Pre-training", journal = j-TALLIP, volume = "23", number = "4", pages = "51:1--51:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3644820", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3644820", abstract = "Recent years have witnessed a surge of academic interest in knowledge-enhanced pre-trained language models (PLMs) that incorporate factual knowledge to enhance \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmed:2024:EUN, author = "Anil Ahmed and Degen Huang and Syed Yasser Arafat and Imran Hameed", title = "Enriching {Urdu} {NER} with {BERT} Embedding, Data Augmentation, and Hybrid Encoder-{CNN} Architecture", journal = j-TALLIP, volume = "23", number = "4", pages = "52:1--52:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648362", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3648362", abstract = "Named Entity Recognition (NER) is an indispensable component of Natural Language Processing (NLP), which aims to identify and classify entities within text data. While \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:BAA, author = "Jiuyi Li and Junpeng Liu and Jianjun Ma and Wei Yang and Degen Huang", title = "Boundary-Aware Abstractive Summarization with Entity-Augmented Attention for Enhancing Faithfulness", journal = j-TALLIP, volume = "23", number = "4", pages = "53:1--53:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3641278", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3641278", abstract = "With the successful application of deep learning, document summarization systems can produce more readable results. However, abstractive summarization still suffers from \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{San:2024:SEL, author = "Mya Ei San and Sasiporn Usanavasin and Ye Kyaw Thu and Manabu Okumura", title = "A Study for Enhancing Low-resource {Thai--Myanmar--English} Neural Machine Translation", journal = j-TALLIP, volume = "23", number = "4", pages = "54:1--54:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3645111", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3645111", abstract = "Several methodologies have recently been proposed to enhance the performance of low-resource Neural Machine Translation (NMT). However, these techniques have yet to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tayir:2024:UMM, author = "Turghun Tayir and Lin Li", title = "Unsupervised Multimodal Machine Translation for Low-resource Distant Language Pairs", journal = j-TALLIP, volume = "23", number = "4", pages = "55:1--55:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652161", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3652161", abstract = "Unsupervised machine translation (UMT) has recently attracted more attention from researchers, enabling models to translate when languages lack parallel corpora. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lu:2024:MQS, author = "Wenpeng Lu and Sibo Wei and Xueping Peng and Yi-Fei Wang and Usman Naseem and Shoujin Wang", title = "Medical Question Summarization with Entity-driven Contrastive Learning", journal = j-TALLIP, volume = "23", number = "4", pages = "56:1--56:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652160", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3652160", abstract = "By summarizing longer consumer health questions into shorter and essential ones, medical question-answering systems can more accurately understand consumer \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Akanova:2024:NSS, author = "Akerke Akanova and Aisulu Ismailova and Zhanar Oralbekova and Zhanat Kenzhebayeva and Galiya Anarbekova", title = "Neurocomputer System of Semantic Analysis of the Text in the {Kazakh} Language", journal = j-TALLIP, volume = "23", number = "4", pages = "57:1--57:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652159", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3652159", abstract = "The purpose of the study is to solve an extreme mathematical problem-semantic analysis of natural language, which can be used in various fields, including marketing research, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zaman:2024:LBL, author = "Farooq Zaman and Onaiza Maqbool and Jaweria Kanwal", title = "Leveraging Bidirectionl {LSTM} with {CRFs} for {Pashto} Tagging", journal = j-TALLIP, volume = "23", number = "4", pages = "58:1--58:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649456", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3649456", abstract = "Part-of-speech tagging plays a vital role in text processing and natural language understanding. Very few attempts have been made in the past for tagging Pashto Part-of-Speech. In \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2024:CDA, author = "Chuanjun Zhao and Meiling Wu and Xinyi Yang and Xuzhuang Sun and Suge Wang and Deyu Li", title = "Cross-Domain Aspect-Based Sentiment Classification with a Pre-Training and Fine-Tuning Strategy for Low-Resource Domains", journal = j-TALLIP, volume = "23", number = "4", pages = "59:1--59:??", month = apr, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653299", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Apr 26 08:51:14 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3653299", abstract = "Aspect-based sentiment classification (ABSC) is a crucial sub-task of fine-grained sentiment analysis, which aims to predict the sentiment polarity of the given aspects in a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Verdu:2024:MLR, author = "Elena Verdu and Yuri Vanessa Nieto and Nasir Saleem", title = "Multi-Lingual Representation of Natural Language Processing for Low Resource {Asian} Language Processing Systems", journal = j-TALLIP, volume = "23", number = "5", pages = "60:1--60:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3603169", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3603169", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wei:2024:MTS, author = "Kaiwen Wei and Li Jin and Zequn Zhang and Zhi Guo and Xiaoyu Li and Qing Liu and Weimiao Feng", title = "More Than Syntaxes: Investigating Semantics to Zero-shot Cross-lingual Relation Extraction and Event Argument Role Labelling", journal = j-TALLIP, volume = "23", number = "5", pages = "61:1--61:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3582261", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3582261", abstract = "Syntactic dependency structures are commonly utilized as language-agnostic features to solve the word order difference issues in zero-shot cross-lingual relation and event \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2024:RUS, author = "Shanshan Yu", title = "A Research on University Students' Behavioral Intention to Use New-generation Information Technology in Intelligent Foreign Language Learning", journal = j-TALLIP, volume = "23", number = "5", pages = "62:1--62:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3563774", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3563774", abstract = "A better understanding of how advancement in science and technology affect students' learning behavior in an academic setting can help all educators in higher education. With the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jain:2024:KBD, author = "Deepak Kumar Jain and Yamila Garc{\'I}a-Mart{\'I}nez Eyre and Akshi Kumar and Brij B. Gupta and Ketan Kotecha", title = "Knowledge-based Data Processing for Multilingual Natural Language Analysis", journal = j-TALLIP, volume = "23", number = "5", pages = "63:1--63:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3583686", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3583686", abstract = "Natural Language Processing (NLP) aids the empowerment of intelligent machines by enhancing human language understanding for linguistic-based human-computer \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmed:2024:ATL, author = "Usman Ahmed and Jerry Chun-Wei Lin and Vicente Garcia Diaz", title = "Automatically Temporal Labeled Data Generation Using Positional Lexicon Expansion for Focus Time Estimation of News Articles", journal = j-TALLIP, volume = "23", number = "5", pages = "64:1--64:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3568164", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3568164", abstract = "Many facts change over time, which is a fundamental aspect of our physical environment. In the case of pandemic articles, the user is not interested in the creation date of the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Das:2024:MNM, author = "Sudhansu Bala Das and Divyajyoti Panda and Tapas Kumar Mishra and Bidyut Kr. Patra and Asif Ekbal", title = "Multilingual Neural Machine Translation for {Indic} to {Indic} Languages", journal = j-TALLIP, volume = "23", number = "5", pages = "65:1--65:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3652026", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3652026", abstract = "The method of translation from one language to another without human intervention is known as Machine Translation (MT). Multilingual neural machine translation (MNMT) is a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Vu:2024:NPG, author = "Dinh Anh Vu and Quang Nhat Minh Pham and Giang Son Tran", title = "A Novel Pretrained General-purpose Vision Language Model for the {Vietnamese} Language", journal = j-TALLIP, volume = "23", number = "5", pages = "66:1--66:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654796", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3654796", abstract = "Lying in the cross-section of computer vision and natural language processing, vision language models are capable of processing images and text at once. These models are helpful in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "66", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarwar:2024:CLB, author = "Raheem Sarwar and Maneesha Perera and Pin Shen Teh and Raheel Nawaz and Muhammad Umair Hassan", title = "Crossing Linguistic Barriers: Authorship Attribution in {Sinhala} Texts", journal = j-TALLIP, volume = "23", number = "5", pages = "67:1--67:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3655620", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3655620", abstract = "Authorship attribution involves determining the original author of an anonymous text from a pool of potential authors. The author attribution task has applications in several domains, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{C:2024:FRN, author = "Vinotheni C. and S. Lakshmana Pandian", title = "Fast Recurrent Neural Network with Bi-{LSTM} for Handwritten {Tamil} Text Segmentation in {NLP}", journal = j-TALLIP, volume = "23", number = "5", pages = "68:1--68:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643808", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3643808", abstract = "Tamil text segmentation is a long-standing test in language comprehension that entails separating a record into adjacent pieces based on its semantic design. Each segment is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rong:2024:MMM, author = "Huan Rong and Zhongfeng Chen and Zhenyu Lu and Fan Xu and Victor S Sheng", title = "Multization: Multi-Modal Summarization Enhanced by Multi-Contextually Relevant and Irrelevant Attention Alignment", journal = j-TALLIP, volume = "23", number = "5", pages = "69:1--69:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3651983", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3651983", abstract = "This article focuses on the task of Multi-Modal Summarization with Multi-Modal Output for China JD.COM e-commerce product description containing both source text and source \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Baishya:2024:PST, author = "Diganta Baishya and Rupam Baruah", title = "Part-of-speech Tagging for Low-resource Languages: Activation Function for Deep Learning Network to Work with Minimal Training Data", journal = j-TALLIP, volume = "23", number = "5", pages = "70:1--70:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3655023", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3655023", abstract = "Numerous natural language processing (NLP) applications exist today, especially for the most commonly spoken languages such as English, Chinese, and Spanish. Popular \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Munivel:2024:PBA, author = "Monisha Munivel and V. S. Felix Enigo", title = "Performance of Binarization Algorithms on {Tamizhi} Inscription Images: an Analysis", journal = j-TALLIP, volume = "23", number = "5", pages = "71:1--71:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3656583", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3656583", abstract = "Binarization of Tamizhi (Tamil-Brahmi) inscription images are highly challenging, as it is captured from very old stone inscriptions that exists around 3rd century BCE in India. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hou:2024:KEP, author = "Wenlong Hou and Weidong Zhao and Xianhui Liu and Wenyan Guo", title = "Knowledge-Enriched Prompt for Low-Resource Named Entity Recognition", journal = j-TALLIP, volume = "23", number = "5", pages = "72:1--72:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3659948", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3659948", abstract = "Named Entity Recognition (NER) in low-resource settings aims to identify and categorize entities in a sentence with limited labeled data. Although prompt-based methods have \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Luo:2024:CJN, author = "Queenie Luo and Yung-Sung Chuang", title = "Cleansing Jewel: a Neural Spelling Correction Model Built On {Google} {OCR}-ed {Tibetan} Manuscripts", journal = j-TALLIP, volume = "23", number = "5", pages = "73:1--73:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3654811", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3654811", abstract = "Scholars in the humanities heavily rely on ancient manuscripts to study history, religion, and socio-political structures of the past. Significant efforts have been devoted \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2024:SCL, author = "Liang Wu and Fangfang Zhang and Chao Cheng and Shinan Song", title = "Supervised Contrast Learning Text Classification Model Based on Data Quality Augmentation", journal = j-TALLIP, volume = "23", number = "5", pages = "74:1--74:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653300", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3653300", abstract = "Token-level data augmentation generates text samples by modifying the words of the sentences. However, data that are not easily classified can negatively affect the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2024:MTM, author = "Yi Ting Chen and Wanting Li and Buzhou Tang", title = "{MRMI-TTS}: Multi-Reference Audios and Mutual Information Driven Zero-Shot Voice Cloning", journal = j-TALLIP, volume = "23", number = "5", pages = "75:1--75:??", month = may, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649501", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu May 16 10:08:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3649501", abstract = "Voice cloning in text-to-speech (TTS) is the process of replicating the voice of a target speaker with limited data. Among various voice cloning techniques, this article focuses \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:ADG, author = "Chunmei Wang and Yuan Luo and Chunli Meng and Feiniu Yuan", title = "An adaptive dual graph convolution fusion network for aspect-based sentiment analysis", journal = j-TALLIP, volume = "23", number = "6", pages = "76:1--76:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3659579", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3659579", abstract = "Aspect-based Sentiment Analysis (ABSA), also known as fine-grained sentiment analysis, aims to predict the sentiment polarity of specific aspect words in the sentence. Some \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Setiawan:2024:SIR, author = "Irwan Setiawan and Hung-Yu Kao", title = "{SUSTEM}: an Improved Rule-based {Sundanese} Stemmer", journal = j-TALLIP, volume = "23", number = "6", pages = "77:1--77:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3656342", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3656342", abstract = "Current Sundanese stemmers either ignore reduplication words or define rules to handle only affixes. There is a significant amount of reduplication words in the Sundanese \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2024:LDS, author = "Shuanghong Huang and Chong Feng and Ge Shi and Zhengjun Li and Xuan Zhao and Xinyan Li and Xiaomei Wang", title = "Learning Domain Specific Sub-layer Latent Variable for Multi-Domain Adaptation Neural Machine Translation", journal = j-TALLIP, volume = "23", number = "6", pages = "78:1--78:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3661305", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3661305", abstract = "Domain adaptation proves to be an effective solution for addressing inadequate translation performance within specific domains. However, the straightforward approach of mixing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bekarystankyzy:2024:IEE, author = "Akbayan Bekarystankyzy and Orken Mamyrbayev and Tolganay Anarbekova", title = "Integrated End-to-End Automatic Speech Recognition for Languages for Agglutinative Languages", journal = j-TALLIP, volume = "23", number = "6", pages = "79:1--79:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663568", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3663568", abstract = "The relevance of the problem of automatic speech recognition lies in the lack of research for low-resource languages, stemming from limited training data and the necessity for new \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2024:NMT, author = "Jinyi Zhang and Ke Su and Haowei Li and Jiannan Mao and Ye Tian and Feng Wen and Chong Guo and Tadahiro Matsumoto", title = "Neural Machine Translation for Low-Resource Languages from a {Chinese}-centric Perspective: a Survey", journal = j-TALLIP, volume = "23", number = "6", pages = "80:1--80:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665244", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3665244", abstract = "Machine translation-the automatic transformation of one natural language (source language) into another (target language) through computational \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhong:2024:RIA, author = "Zhengwu Zhong", title = "Research on the Implementation of Advertising Design Teaching Based on {Unity$3$D} Development Platform and {Web$3$D} Technology", journal = j-TALLIP, volume = "23", number = "6", pages = "81:1--81:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3595294", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3595294", abstract = "In this work, the Unity3D development platform and Web3D technology are integrated into the teaching method of advertising design to get rid of the issues due to lack of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yin:2024:AII, author = "Jie Yin", title = "Application of Intelligent Image Recognition and Digital Media Art in the Inheritance of Black Pottery Intangible Cultural Heritage", journal = j-TALLIP, volume = "23", number = "6", pages = "82:1--82:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597430", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3597430", abstract = "With the development of science and technology, intelligent image recognition and digital media art are gradually applied to the inheritance of intangible cultural heritage. In the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gao:2024:IMN, author = "Yuan Gao and Yani Wu and Jun Qian", title = "Intelligent Multimedia Network Security and {PBL} Teaching Mode in the Basic Course Teaching of College Design Major", journal = j-TALLIP, volume = "23", number = "6", pages = "83:1--83:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3597429", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3597429", abstract = "At this stage, there are problems of disconnection between theoretical knowledge and practice and lack of coherence and progression in the teaching of basic courses for design \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:SAC, author = "Jie Wang and Jiangjun Yuan and Weinan Liu", title = "Sentiment Analysis and Corpus: Cognitive Perspective and Overhead-accuracy Tradeoff", journal = j-TALLIP, volume = "23", number = "6", pages = "84:1--84:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3594537", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3594537", abstract = "Human logical thinking is in the form of natural language. With the development of computer science techniques, it becomes easier and more convenient for natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2024:APE, author = "Lili Yang and Yue Zhang", title = "Application of Psychological Effects in Intelligent Digital {English} Teaching Based on Multimodal Low Resource Language Information Processing from a Psychological Perspective", journal = j-TALLIP, volume = "23", number = "6", pages = "85:1--85:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3599726", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3599726", abstract = "With the development of information technology, intelligent digital education is gradually emerging. At the same time, with the closer economic and trade relations between \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:VSI, author = "Yifang Wang", title = "Virtual Sound Image Reconstruction Method for Multi-objective Optimization of Folk Music Based on Evolutionary Algorithm", journal = j-TALLIP, volume = "23", number = "6", pages = "86:1--86:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3604614", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3604614", abstract = "At present, the exchanges in various fields such as culture, economy, and politics are becoming more and more close in the world. From the perspective of the relationship between \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tu:2024:COE, author = "Haihua Tu", title = "Collaborative Optimization of {English} Online Teaching Informatization Based on Intelligent Multimedia Image Technology", journal = j-TALLIP, volume = "23", number = "6", pages = "87:1--87:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3599725", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3599725", abstract = "With the rapid development of computer technology and the deepening of educational concepts, the traditional English teaching methods have also changed. As a new \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:EAE, author = "Tianhua Li and Shaowei Qu", title = "Effectiveness Analysis of Entrepreneurial Legal Risk Prevention Based on Multi-Modal Deep Learning Model", journal = j-TALLIP, volume = "23", number = "6", pages = "88:1--88:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3622937", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3622937", abstract = "With the emergence of the upsurge of entrepreneurship, entrepreneurs are increasingly concerned about legal risks. In the process of entrepreneurship, legal risk is the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:RRM, author = "Huaben Wang and Jixueyang Tang", title = "Research on Recognition Method of Social Robot Based on {T-A-GCNIIT} in the {Metaverse}", journal = j-TALLIP, volume = "23", number = "6", pages = "89:1--89:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3624014", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3624014", abstract = "Social robots are used in intelligent customer service, intelligent chat, intelligent shopping guides, and more because of emotion recognition studies in cognitive psychology. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "89", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhong:2024:RPA, author = "Zhengwu Zhong", title = "Research on Product Advertising Design Combining Feature Extraction Technology and {Web$3$D} Technology", journal = j-TALLIP, volume = "23", number = "6", pages = "90:1--90:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3608948", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3608948", abstract = "This work, built on the Unity3D development platform, presents a way for merging feature extraction technology and Web3D technology into advertising design to effectively \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "90", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2024:MLN, author = "Yong Huang and Lu Zhang", title = "Machine Learning and Natural Language Processing Algorithms in the Remote Mobile Medical Diagnosis System of {Internet} Hospitals", journal = j-TALLIP, volume = "23", number = "6", pages = "91:1--91:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632172", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3632172", abstract = "In order to alleviate the contradiction between supply and demand of professional pharmacists, integrate medical resources, and ensure the safety of patients' medication, the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "91", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fu:2024:IVL, author = "Liwei Fu and Qiang Li", title = "Investigation of Visual Language Landscape of Tourist Attractions from Multimodal Perspective", journal = j-TALLIP, volume = "23", number = "6", pages = "92:1--92:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3638049", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3638049", abstract = "With the development of economic globalization, the tourism industry has been welcomed by the public. The visual language landscape of tourist attractions can not only assist tourists \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "92", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Han:2024:CPT, author = "Jing Han", title = "Construction of Practical Teaching Mode of Law Course Based on Multi-Mode and Low-Resource Language Learning", journal = j-TALLIP, volume = "23", number = "6", pages = "93:1--93:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3622938", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3622938", abstract = "China's legal profession is a new profession emerging in the process of China's rule of law. Its development and refinement are increasing in accordance with the rapid social, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "93", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cao:2024:AIT, author = "Yejun Cao and Xiwen Yu and Fengling Jiang", title = "Application of {$3$D} Image Technology in Rural Planning", journal = j-TALLIP, volume = "23", number = "6", pages = "94:1--94:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3628448", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3628448", abstract = "The well-being of villages and villagers is directly related to the development of urban-rural relations. Rural development is an important part of poverty alleviation, as well as the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "94", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Feng:2024:IDM, author = "Qiao Feng and Tian Huang", title = "An Interaction-Design Method Based upon a Modified Algorithm of {Newton's Second Law of Motion}", journal = j-TALLIP, volume = "23", number = "6", pages = "95:1--95:??", month = jun, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657634", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Jun 26 10:46:58 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3657634", abstract = "Newton's Second Law of Motion algorithm is crucial to interactive visual effects and interactive behavior in interface design. Designers can only utilize simple \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "95", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2024:TBQ, author = "Runxin Sun and Shizhu He and Jun Zhao and Kang Liu", title = "Towards Better Quantity Representations for Solving Math Word Problems", journal = j-TALLIP, volume = "23", number = "7", pages = "96:1--96:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665644", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3665644", abstract = "Solving a math word problem requires selecting quantities in it and performing appropriate arithmetic operations to obtain the answer. For deep learning-based methods, it is vital \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "96", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qiu:2024:QSA, author = "Bing Qiu and Jiahao Huo", title = "Quantitative Stylistic Analysis of Middle {Chinese} Texts Based on the Dissimilarity of Evolutive Core Word Usage", journal = j-TALLIP, volume = "23", number = "7", pages = "97:1--97:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665794", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3665794", abstract = "Stylistic analysis enables open-ended and exploratory observation of languages. To fill the gap in the quantitative analysis of the stylistic systems of Middle Chinese, we \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "97", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:HHD, author = "Akshi Kumar and Abhishek Mallik and Sanjay Kumar", title = "{HumourHindiNet}: Humour detection in {Hindi} web series using word embedding and convolutional neural network", journal = j-TALLIP, volume = "23", number = "7", pages = "98:1--98:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3661306", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3661306", abstract = "Humour is a crucial aspect of human speech, and it is, therefore, imperative to create a system that can offer such detection. While data regarding humour in English speech is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "98", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lalramhluna:2024:MMB, author = "Robert Lalramhluna and Sandeep Dash and Dr.Partha Pakray", title = "{MizBERT}: a {Mizo BERT} Model", journal = j-TALLIP, volume = "23", number = "7", pages = "99:1--99:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3666003", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3666003", abstract = "This research investigates the utilization of pre-trained BERT transformers within the context of the Mizo language. BERT, an abbreviation for Bidirectional Encoder Representations \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "99", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:DLR, author = "Xiaoyi Wang and Jie Liu and Jiong Wang and Jianyong Duan and Guixia Guan and Qing Zhang and Jianshe Zhou", title = "Document-Level Relation Extraction Based on Machine Reading Comprehension and Hybrid Pointer-sequence Labeling", journal = j-TALLIP, volume = "23", number = "7", pages = "100:1--100:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3666042", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3666042", abstract = "Document-level relational extraction requires reading, memorization, and reasoning to discover relevant factual information in multiple sentences. It is difficult for the current \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "100", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2024:SSC, author = "Yangyang Xu and Zhuoer Zhao and Xiao Sun", title = "{SCBG}: Semantic-Constrained Bidirectional Generation for Emotional Support Conversation", journal = j-TALLIP, volume = "23", number = "7", pages = "101:1--101:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3666090", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3666090", abstract = "The Emotional Support Conversation (ESC) task aims to deliver consolation, encouragement, and advice to individuals undergoing emotional distress, thereby assisting them in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "101", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guo:2024:XPW, author = "Shih-Wei Guo and Yao-Chung Fan", title = "{X-Phishing-Writer}: a Framework for Cross-lingual Phishing E-mail Generation", journal = j-TALLIP, volume = "23", number = "7", pages = "102:1--102:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3670402", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3670402", abstract = "Cybercrime is projected to cause annual business losses of \$10.5 trillion by 2025, a significant concern given that a majority of security breaches are due to human errors \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "102", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pahari:2024:SWY, author = "Niraj Pahari and Kazutaka Shimada", title = "Share What You Already Know: Cross-Language-Script Transfer and Alignment for Sentiment Detection in Code-Mixed Data", journal = j-TALLIP, volume = "23", number = "7", pages = "103:1--103:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3661307", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3661307", abstract = "Code-switching entails mixing multiple languages. It is an increasingly occurring phenomenon in social media texts. Usually, code-mixed texts are written in a single \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "103", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Aithal:2024:KLD, author = "Shreya R. Aithal and Muralikrishna Sn and Raghavendra Ganiga and Ashwath Rao B. and Govardhan Hegde K.", title = "{KannadaLex}: a lexical database with psycholinguistic information", journal = j-TALLIP, volume = "23", number = "7", pages = "104:1--104:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3670688", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3670688", abstract = "Databases containing lexical properties are of primary importance to psycholinguistic research and speech-language therapy. Several lexical databases for different languages have \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "104", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Salhab:2024:AAS, author = "Mahmoud Salhab and Haidar Harmanani", title = "{AraSpot}: {Arabic} Spoken Command Spotting", journal = j-TALLIP, volume = "23", number = "7", pages = "105:1--105:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3674968", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3674968", abstract = "Spoken keyword spotting is the task of identifying a keyword in an audio stream and is widely used in smart devices at the edge to activate voice assistants and perform \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "105", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:ECE, author = "Fei Li and Kaifang Deng and Yiwen Mo and Yuanze Ji and Chong Teng and Donghong Ji", title = "Enhancing {Chinese} Event Extraction with Event Trigger Structures", journal = j-TALLIP, volume = "23", number = "7", pages = "106:1--106:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3663567", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3663567", abstract = "The dependency syntactic structure is widely used in event extraction. However, the dependency structure reflecting syntactic features is essentially different from \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "106", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sidhoum:2024:SMH, author = "Abdellah Hamouda Sidhoum and Mhamed Mataoui and Faouzi Sebbak and Adil Imad Eddine Hosni and Kamel Smaili", title = "Scoring Multi-hop Question Decomposition Using Masked Language Models", journal = j-TALLIP, volume = "23", number = "7", pages = "107:1--107:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3665140", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3665140", abstract = "Question answering (QA) is a sub-field of Natural Language Processing (NLP) that focuses on developing systems capable of answering natural language queries. Within this \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "107", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bousmaha:2024:AAS, author = "Kheira Zineb Bousmaha and Khaoula Hamadouche and Hadjer Djouabi and Lamia Hadrich-Belguith", title = "Automatic {Algerian} Sarcasm Detection from Texts and Images", journal = j-TALLIP, volume = "23", number = "7", pages = "108:1--108:??", month = jul, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3670403", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:09 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3670403", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "108", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{He:2024:ENS, author = "Guijiao He and Yunfeng Zhou and Yaodong Zheng", title = "Evaluation on Network Social Media Named Entity Recognition Model Based on Active Learning", journal = j-TALLIP, volume = "23", number = "8", pages = "109:1--109:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3600055", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3600055", abstract = "The medical security privacy and named entity recognition (NER) technology under the blockchain technology has been a hot topic in all walks of life. As a typical \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "109", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dong:2024:NLP, author = "Jun Dong", title = "Natural Language Processing Pretraining Language Model for Computer Intelligent Recognition Technology", journal = j-TALLIP, volume = "23", number = "8", pages = "110:1--110:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605210", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605210", abstract = "Computer intelligent recognition technology refers to the use of computer vision, Natural Language Processing (NLP), machine learning and other technologies to enable \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "110", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zeng:2024:MMC, author = "Zhifa Zeng and Yuhang Li", title = "Multi-modal {Chinese} Text Emotion Metaphor Computation Based on Mutual Information and Information Entropy", journal = j-TALLIP, volume = "23", number = "8", pages = "111:1--111:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3605211", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3605211", abstract = "Metaphor is to express another thing through one thing, it is not only a rhetorical means, but also embodies a kind of analogical cognition and way of thinking of people. In recent \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "111", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Duan:2024:ISM, author = "Xingyu Duan and Chun-Nan Chen and Mohammad Shokouhifar", title = "Impacts of Social Media Advertising on Purchase Intention and Customer Loyalty in E-Commerce Systems", journal = j-TALLIP, volume = "23", number = "8", pages = "112:1--112:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3613448", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3613448", abstract = "The emergence of new technologies has had a noteworthy impact on communication systems, leading to the importance of conducting research in this area due to the significant \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "112", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Perti:2024:CHD, author = "Ashwin Perti and Amit Sinha and Ankit Vidyarthi", title = "Cognitive Hybrid Deep Learning-based Multi-modal Sentiment Analysis for Online Product Reviews", journal = j-TALLIP, volume = "23", number = "8", pages = "113:1--113:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3615356", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3615356", abstract = "Recently the field of sentiment analysis has gained a lot of attraction in literature. The idea that a machine can dynamically spot the text's sentiments is fascinating. In this paper, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "113", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hamza:2024:MRH, author = "Ameer Hamza and Abdul Rehman Javed and Farkhund Iqbal and Amanullah Yasin and Gautam Srivastava and Dawid Po{\l}ap and Thippa Reddy Gadekallu and Zunera Jalil", title = "Multimodal Religiously Hateful Social Media Memes Classification Based on Textual and Image Data", journal = j-TALLIP, volume = "23", number = "8", pages = "114:1--114:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3623396", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3623396", abstract = "Multimodal hateful social media meme detection is an important and challenging problem in the vision-language domain. Recent studies show high accuracy for such multimodal \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "114", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yu:2024:TCB, author = "Haitao Yu and Feng Xiong and Zuhui Chen", title = "Text Classification Based on Natural Language Processing and Machine Learning in Multi-Label Corpus", journal = j-TALLIP, volume = "23", number = "8", pages = "115:1--115:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3617831", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3617831", abstract = "The rapid development of the Internet has led to a geometric expansion of text information resources online. Among them, corpus, as the basic data source of natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "115", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2024:LMM, author = "Jinhui Liu and Feng Zhang", title = "Language Model Method for Collocation Rules of Parts of Speech in Machine Translation System", journal = j-TALLIP, volume = "23", number = "8", pages = "116:1--116:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625095", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3625095", abstract = "With the development of the times, modern society has now entered the Internet of Things (IoT) information age and Machine Translation (MT) plays an important role in \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "116", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:HPD, author = "Akshi Kumar and Dipika Jain and Rohit Beniwal", title = "{HindiPersonalityNet}: Personality Detection in {Hindi} Conversational Data Using Deep Learning with Static Embedding", journal = j-TALLIP, volume = "23", number = "8", pages = "117:1--117:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3625228", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3625228", abstract = "Personality detection along with other behavioral and cognitive assessment can essentially explain why people act the way they do and can be useful to various online applications such as \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "117", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:HDR, author = "Suresh Kumar and Jyoti Prakash Singh and Surya Kant and Neha Jain", title = "A Hybrid Deep Ranking Weighted Multi-Hashing Recommender System", journal = j-TALLIP, volume = "23", number = "8", pages = "118:1--118:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3626195", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3626195", abstract = "In countries where there is a low availability of resources for language, businesses face the challenge of overcoming language barriers to reach their customers. One possible \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "118", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Manimaran:2024:EAI, author = "A. Manimaran and Mohammad Haider Syed and M. Siva Kumar and S. Selvanayaki and Gurram Sunitha and Asmita Manna", title = "Enhancing {Asian} Indigenous Language Processing through Deep Learning-based Handwriting Recognition and Optimization Techniques", journal = j-TALLIP, volume = "23", number = "8", pages = "119:1--119:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3632173", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3632173", abstract = "Asian indigenous language or autochthonous language is a language which is native to a region and spoken by indigenous people in Asia. This language is a linguistically different \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "119", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmad:2024:ECD, author = "Pir Noman Ahmad and Yuanchao Liu and Inam Ullah and Mohammad Shabaz", title = "Enhancing Coherence and Diversity in Multi-class Slogan Generation Systems", journal = j-TALLIP, volume = "23", number = "8", pages = "120:1--120:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3637551", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3637551", abstract = "Many problems related to natural language processing are solved by neural networks and big data. Researchers have previously focused on single-task supervised goals with \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "120", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kusal:2024:UPA, author = "Sheetal D. Kusal and Shruti G. Patil and Jyoti Choudrie and Ketan V. Kotecha", title = "Understanding the Performance of {AI} Algorithms in Text-Based Emotion Detection for Conversational Agents", journal = j-TALLIP, volume = "23", number = "8", pages = "121:1--121:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3643133", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3643133", abstract = "Current industry trends demand automation in every aspect, where machines could replace humans. Recent advancements in conversational agents have grabbed a lot of attention \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "121", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ai:2024:EAI, author = "Qinghua Ai and Qingyan Ai and Jun Wang", title = "Exploration on Advanced Intelligent Algorithms of Artificial Intelligence for Verb Recognition in Machine Translation", journal = j-TALLIP, volume = "23", number = "8", pages = "122:1--122:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649891", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3649891", abstract = "This article aimed to address the problems of word order confusion, context dependency, and ambiguity in traditional machine translation (MT) methods for verb recognition. By \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "122", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Su:2024:NLP, author = "Yawen Su", title = "Natural Language Processing System for Text Classification Corpus Based on Machine Learning", journal = j-TALLIP, volume = "23", number = "8", pages = "123:1--123:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3648361", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3648361", abstract = "A classification system for hazardous materials in air traffic control was investigated using the Human Factors Analysis and Classification System (HFACS) framework and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "123", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Naosekpam:2024:HST, author = "Veronica Naosekpam and Nilkanta Sahu", title = "A Hybrid Scene Text Script Identification Network for Regional {Indian} Languages", journal = j-TALLIP, volume = "23", number = "8", pages = "124:1--124:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3649439", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3649439", abstract = "In this work, we introduce WAFFNet, an attention-centric feature fusion architecture tailored for word-level multi-lingual scene text script identification. Motivated by the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "124", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaur:2024:HEP, author = "Ravleen Kaur and M. P. S. Bhatia and Akshi Kumar", title = "Am {I} Hurt?: {Evaluating} Psychological Pain Detection in {Hindi} Text Using Transformer-based Models", journal = j-TALLIP, volume = "23", number = "8", pages = "125:1--125:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3650206", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3650206", abstract = "The automated evaluation of pain is critical for developing effective pain management approaches that seek to alleviate pain while preserving patients' functioning. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "125", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tai:2024:TAM, author = "Yu Tai and Hongwei Yang and Hui He and Xinglong Wu and Yuanming Shao and Weizhe Zhang and Arun Kumar Sangaiah", title = "Topic-aware Masked Attentive Network for Information Cascade Prediction", journal = j-TALLIP, volume = "23", number = "8", pages = "126:1--126:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3653449", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3653449", abstract = "Predicting information cascades holds significant practical implications, including applications in public opinion analysis, rumor control, and product recommendation. Existing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "126", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:HDB, author = "Ashwini Kumar and Santosh Kumar and Kalpdrum Passi and Aniket Mahanti", title = "A Hybrid Deep {BiLSTM-CNN} for Hate Speech Detection in Multi-social media", journal = j-TALLIP, volume = "23", number = "8", pages = "127:1--127:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3657635", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3657635", abstract = "Nowadays, means of communication among people have changed due to advancements in information technology and the rise of online multi-social media. Many people \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "127", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Banik:2024:BIP, author = "Debajyoty Banik and Saneyika Das and Sheshikala Martha and Achyut Shankar", title = "{BERT}-Inspired Progressive Stacking to Enhance Spelling Correction in {Bengali} Text", journal = j-TALLIP, volume = "23", number = "8", pages = "128:1--128:??", month = aug, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3669941", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Aug 17 07:29:10 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3669941", abstract = "Common spelling checks in the current digital era have trouble reading languages such as Bengali, which employ English letters differently. In response, we have created a better \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "128", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2024:ALR, author = "Jiushun Ma and Yuxin Huang and Linqin Wang and Xiang Huang and Hao Peng and Zhengtao Yu and Philip Yu", title = "Augmenting Low-Resource Cross-Lingual Summarization with Progression-Grounded Training and Prompting", journal = j-TALLIP, volume = "23", number = "9", pages = "129:1--129:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675167", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3675167", abstract = "Cross-lingual summarization (CLS), generating summaries in one language from source documents in another language, offers invaluable assistance in enabling global access to \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "129", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Inaba:2024:TAT, author = "Michimasa Inaba and Yuya Chiba and Zhiyang Qi and Ryuichiro Higashinaka and Kazunori Komatani and Yusuke Miyao and Takayuki Nagai", title = "Travel Agency Task Dialogue Corpus: a Multimodal Dataset with Age-Diverse Speakers", journal = j-TALLIP, volume = "23", number = "9", pages = "130:1--130:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675166", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3675166", abstract = "When individuals communicate, they use different vocabularies, speaking speeds, facial expressions, and gestural languages, depending on those with whom they are speaking. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "130", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2024:MTL, author = "Pruthwik Mishra and Vandan Mujadia and Dipti Misra Sharma", title = "Multi Task Learning Based Shallow Parsing for {Indian} Languages", journal = j-TALLIP, volume = "23", number = "9", pages = "131:1--131:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3664620", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3664620", abstract = "Shallow Parsing is an important step for many Natural Language Processing tasks. Although shallow parsing has a rich history for resource rich languages, it is not the case for \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "131", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pham:2024:TVQ, author = "Quoc-Hung Pham and Huu-Loi Le and Minh Dang Nhat and Khang Tran T. and Manh Tran-Tien and Viet-Hung Dang and Huy-The Vu and Minh-Tien Nguyen and Xuan-Hieu Phan", title = "Towards {Vietnamese} Question and Answer Generation: an Empirical Study", journal = j-TALLIP, volume = "23", number = "9", pages = "132:1--132:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675781", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3675781", abstract = "Question-answer generation (QAG) is a challenging task that generates both questions and answers from a given input paragraph context. The QAG task has recently \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "132", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2024:CCN, author = "Shu Zhao and Zhuoer Zhao and Yangyang Xu and Xiao Sun", title = "{CoMix}: Confronting with Noisy Label Learning with Co-training Strategies on Textual Mislabeling", journal = j-TALLIP, volume = "23", number = "9", pages = "133:1--133:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3678175", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3678175", abstract = "The existence of noisy labels is inevitable in real-world large-scale corpora. As deep neural networks are notably vulnerable to overfitting on noisy samples, this highlights the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "133", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kaur:2024:AII, author = "Manpreet Kaur and Munish Saini", title = "Artificial Intelligence inspired method for cross-lingual cyberhate detection from low resource languages", journal = j-TALLIP, volume = "23", number = "9", pages = "134:1--134:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677176", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3677176", abstract = "The appearance of inflammatory language on social media by college or university students is quite prevalent, inspiring platforms to engage in community safety mechanisms. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "134", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alotaibi:2024:TCD, author = "Fahd Saleh Alotaibi and Khaled Hamed Alyoubi and Ajay Mittal and Vishal Gupta and Navdeep Kaur", title = "{TinyCheXReport}: Compressed deep neural network for Chest {X}-ray report generation", journal = j-TALLIP, volume = "23", number = "9", pages = "135:1--135:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676166", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3676166", abstract = "Increase in Chest X-ray (CXR) imaging tests has burdened radiologists, thereby posing significant challenges in writing radiological reports on time. Although several deep learning-based \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "135", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chowdhury:2024:AET, author = "Priyanjana Chowdhury and Nabanika Sarkar and Sanghamitra Nath and Utpal Sharma", title = "Analyzing the Effects of Transcription Errors on Summary Generation of {Bengali} Spoken Documents", journal = j-TALLIP, volume = "23", number = "9", pages = "136:1--136:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3678005", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3678005", abstract = "Automatic speech recognition (ASR) has become an indispensable part of the AI domain, with various speech technologies reliant on it. The quality of speech recognition depends \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "136", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lu:2024:OUS, author = "Kexin Lu and Zhihua Huang and Mingming Yin and Ke Chen", title = "Optimizing {Uyghur} Speech Synthesis by Combining Pretrained Cross-Lingual Model", journal = j-TALLIP, volume = "23", number = "9", pages = "137:1--137:??", month = sep, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675397", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 24 06:26:43 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3675397", abstract = "End-to-end speech synthesis methodologies have exhibited considerable advancements for languages with abundant corpus resources. Nevertheless, such achievements are \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "137", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pham:2024:XMB, author = "Khang Pham and Long Nguyen and Dien Dinh", title = "{XLIT}: a Method to Bridge Task Discrepancy in Machine Translation Pre-training", journal = j-TALLIP, volume = "23", number = "10", pages = "138:1--138:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689630", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3689630", abstract = "Transfer learning from pre-trained language models to encoder-decoder translation models faces a challenge due to the mismatch between the tasks of pre-training and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "138", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Torjmen:2024:TTD, author = "Roua Torjmen and Kais Haddar", title = "Translation from {Tunisian} Dialect to {Modern Standard Arabic}: Exploring Finite-State Transducers and Sequence-to-Sequence Transformer Approaches", journal = j-TALLIP, volume = "23", number = "10", pages = "139:1--139:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3681788", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3681788", abstract = "Translation from the mother tongue, including the Tunisian dialect, to modern standard Arabic is a highly significant field in natural language processing due to its wide range of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "139", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Waghmare:2024:EBB, author = "Prachi Pramod Waghmare and Ashwini Mangesh Deshpande", title = "Enhanced {BERT}-based Multi-Head Self-Attention Transformer for Transformation of {Marathi} Text to {Marathi} Sign Language Gloss", journal = j-TALLIP, volume = "23", number = "10", pages = "140:1--140:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687304", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3687304", abstract = "One recent advancement in the field of machine learning is the translation of text into sign language gloss, which is a form of natural language for the deaf community. The \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "140", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Munaf:2024:LRS, author = "Mubashir Munaf and Hammad Afzal and Khawir Mahmood and Naima Iltaf", title = "Low Resource Summarization using Pre-trained Language Models", journal = j-TALLIP, volume = "23", number = "10", pages = "141:1--141:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3675780", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3675780", abstract = "With the advent of Deep Learning-based Artificial Neural Network models, Natural Language Processing (NLP) has witnessed significant improvements in textual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "141", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Madan:2024:CBM, author = "Anjum Madan and Devender Kumar", title = "{CNN}-Based Models for Emotion and Sentiment Analysis Using Speech Data", journal = j-TALLIP, volume = "23", number = "10", pages = "142:1--142:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687303", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3687303", abstract = "The study aims to present an in-depth Sentiment Analysis (SA) grounded by the presence of emotions in the speech signals. Nowadays, all kinds of web-based applications \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "142", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2024:TPM, author = "Jinghua Zhao and Xiting Lyu and Haiying Rong and Jiale Zhao", title = "{TRGCN}: a Prediction Model for Information Diffusion Based on Transformer and Relational Graph Convolutional Network", journal = j-TALLIP, volume = "23", number = "10", pages = "143:1--143:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3672074", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3672074", abstract = "In order to capture and integrate the structural features and temporal features contained in social graph and diffusion cascade more effectively, an information diffusion \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "143", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yang:2024:KIP, author = "Songhua Yang and Chenghao Zhang and Chenyuan He and Hongfei Xu and Hongying Zan and Yuxiang Jia", title = "Knowledge-injected Prompt Learning for {Chinese} Biomedical Entity Normalization", journal = j-TALLIP, volume = "23", number = "10", pages = "144:1--144:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689629", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3689629", abstract = "The Biomedical Entity Normalization (BEN) task aims to align raw, unstructured medical entities to standard entities, thus promoting data coherence and facilitating better \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "144", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gogoi:2024:CLR, author = "Parismita Gogoi and Priyankoo Sarmah and S. R. M. Prasanna", title = "Cross-linguistic rhythm analysis of {Mising} and {Assamese}", journal = j-TALLIP, volume = "23", number = "10", pages = "145:1--145:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3694785", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3694785", abstract = "The objective of the current study is to explore a quantitative frequency domain technique to evaluate rhythm in spontaneous speech data of 19 native speakers of Mising and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "145", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zafar:2024:TBT, author = "Amna Zafar and Muhammad Wasim and Shaista Zulfiqar and Talha Waheed and AbuBakar Siddique", title = "Transformer-Based Topic Modeling for {Urdu} Translations of the {Holy Quran}", journal = j-TALLIP, volume = "23", number = "10", pages = "146:1--146:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3694967", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3694967", abstract = "Topic modeling enables the discovery of concealed themes and patterns in extensive text collections. It facilitates a thorough examination of the messages present in religious \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "146", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tao:2024:SDL, author = "Zekun Tao and Changjian Wang and Zhiliang Tian and Kele Xu and Yong Guo and Shanshan Li and Yanru Bai and Da Xie", title = "{SIAT}: Document-level Event Extraction via Spatiality-Augmented Interaction Model with Adaptive Thresholding", journal = j-TALLIP, volume = "23", number = "10", pages = "147:1--147:??", month = oct, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3698261", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Oct 31 10:36:28 MDT 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3698261", abstract = "Document-level event extraction endeavors to automatically extract structural events from a given document. Many existing approaches focus on modeling entity interactions and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "147", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2024:ISI, author = "Chi Lin and Chang Wu Yu and Ning Wang", title = "Introduction to the Special Issue on Cognitive-Inspired Multimedia Information Processing and Applications for Low-Resource Languages", journal = j-TALLIP, volume = "23", number = "11", pages = "148:1--148:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676150", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3676150", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "148", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2024:FFP, author = "Yirui Wu and Lilai Zhang and Hao Li and Yunfei Zhang and Shaohua Wan", title = "Feature Fusion Pyramid Network for End-to-End Scene Text Detection", journal = j-TALLIP, volume = "23", number = "11", pages = "149:1--149:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3582003", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3582003", abstract = "How to properly involve text characteristics like multi-scale, arbitrary direction, length aspect ratio, into detection network design has become a hot topic in computer vision. Feature \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "149", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rutherford:2024:ECB, author = "Attapol Rutherford and Pawitsapak Akarajaradwong", title = "Exploring the Correlation between Emojis and Mood Expression in {Thai} {Twitter} Discourse", journal = j-TALLIP, volume = "23", number = "11", pages = "150:1--150:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3680543", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3680543", abstract = "Mood, a long-lasting affective state detached from specific stimuli, plays an important role in behavior. Although sentiment analysis and emotion classification have garnered \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "150", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2024:CRC, author = "Niraj Singh and Komal Naaz and Raj Aryan", title = "Can Rhyme Consistency Score be used as a Feature in Stylistics? {A} Statistical Endeavour with {Hindi} Poetry", journal = j-TALLIP, volume = "23", number = "11", pages = "151:1--151:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3681789", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3681789", abstract = "Stylistics is the study and analysis of linguistic style using the tools of language. It is an investigation of language elements employed in a spoken or written piece under \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "151", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cui:2024:DNN, author = "Xingyu Cui and Yong Li and Lili Xu", title = "Deep Neural Network with a Characteristic Analysis for Seal Stroke Recognition", journal = j-TALLIP, volume = "23", number = "11", pages = "152:1--152:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3676883", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3676883", abstract = "Seal characters are derived from ancient Chinese pictographs, naturally inheriting pictographic characteristics and complex structures. As the essential components of seal \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "152", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lv:2024:ITW, author = "Hui Lv and Hao Lv and Liu Yang and Jun Shen and La Duo and Yan Li and Qingguo Zhou and Binbin Yong", title = "Improved {Tibetan} Word Vectors Models Based on Position Information Fusion", journal = j-TALLIP, volume = "23", number = "11", pages = "153:1--153:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3681787", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3681787", abstract = "Tibetan language processing is crucial for preserving its rich cultural heritage and reducing communication barriers between different languages. However, as a low-resource \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "153", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2024:ELR, author = "Jiajia Li and Ping Wang and Zuchao Li and Kevin Parnow and Hai Zhao and Weiping Ding", title = "Enhancing Lyrics Rewriting with Weak Supervision from Grammatical Error Correction Pre-training and Reference Knowledge Fusion", journal = j-TALLIP, volume = "23", number = "11", pages = "154:1--154:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687126", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3687126", abstract = "Lyric rewriting involves taking the original lyrics of a song and creatively rephrasing them while preserving their core meaning and emotional essence. Sequence-to-sequence \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "154", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Badri:2024:AHS, author = "Nabil Badri and Ferihane Kboubi and Anja Habacha Chaibi", title = "Abusive and Hate speech Classification in {Arabic} Text Using Pre-trained Language Models and Data Augmentation", journal = j-TALLIP, volume = "23", number = "11", pages = "155:1--155:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3679049", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3679049", abstract = "Hateful content on social media is a worldwide problem that adversely affects not just the targeted individuals but also anyone whose content is accessible. The majority of \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "155", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2024:MFM, author = "Richa Sharma and Arti Arya", title = "{MMHFND}: Fusing Modalities for Multimodal Multiclass {Hindi} Fake News Detection via Contrastive Learning", journal = j-TALLIP, volume = "23", number = "11", pages = "156:1--156:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3686797", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3686797", abstract = "Multimodal content contains more deception than unimodal information, causing significant social and economic impacts. Current techniques often focus on a single modality, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "156", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2024:LVB, author = "Shashvat Singh and Kumkum Kumari and Ankita Vaish", title = "Learning and Vision-based approach for Human fall detection and classification in naturally occurring scenes using video data", journal = j-TALLIP, volume = "23", number = "11", pages = "157:1--157:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3687125", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3687125", abstract = "The advancement of medicine presents challenges for modern cultures, especially with unpredictable elderly falling incidents anywhere due to serious health issues. Delayed \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "157", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2024:UDA, author = "Xi Wang and Ruoqing Zhao and Jing Li and Piji Li", title = "An Unsupervised Domain-Adaptive Framework for {Chinese} Spelling Checking", journal = j-TALLIP, volume = "23", number = "11", pages = "158:1--158:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689821", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3689821", abstract = "Chinese Spelling Check (CSC) is a meaningful task in the area of natural language processing, which aims at detecting spelling errors in Chinese texts and then correcting these errors. Current typical CSC models have shown impressive performance in general datasets with the help of pretrained language models such as BERT, but they suffer great performance loss in downstream tasks with domain-specific terms because they are primarily trained on general corpora. To verify the cross-domain adaptation ability of these models, we build three new datasets with abundant domain-specific terms on financial, medical, and legal domains and conduct empirical investigations on them in the corresponding domain-specific test datasets to verify the cross-domain adaptation ability. In response to the poor performance of the existing models, we propose a framework named uChecker, which utilizes an unsupervised method in spelling error detection and correction. Experimental results prove that uChecker can perform well in domain-specific test datasets while not losing its performance in the general domain.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "158", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shukla:2024:SCA, author = "Shiv Shankar Prasad Shukla and Maheshwari Prasad Singh", title = "Stacked Classification Approach using Optimized Hybrid Deep Learning Model for Early Prediction of Behaviour Changes on Social Media", journal = j-TALLIP, volume = "23", number = "11", pages = "159:1--159:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3689906", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3689906", abstract = "Detecting signs of suicidal thoughts on social media is paramount for preventing suicides, given the platforms' role as primary outlets for emotional expression. Traditional \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "159", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{M:2024:SSM, author = "Rahul Raj M. and Dhanya S. Pankaj", title = "{Social-sum-Mal}: a Dataset for Abstractive Text Summarization in {Malayalam}", journal = j-TALLIP, volume = "23", number = "11", pages = "160:1--160:??", month = nov, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3696107", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Nov 26 06:18:09 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3696107", abstract = "Abstractive text summarization techniques for Malayalam language is still in its infancy. The lack of benchmarked datasets for this task is one of the constraints in developing and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "160", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cao:2024:WSD, author = "Yukun Cao and Chengkun Jin and Yijia Tang and ZiYue Wei", title = "Word Sense Disambiguation Combining Knowledge Graph and Text Hierarchical Structure", journal = j-TALLIP, volume = "23", number = "12", pages = "161:1--161:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3677524", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3677524", abstract = "Current supervised word sense disambiguation models have obtained high disambiguation results using annotated information of different word senses and pre-trained language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "161", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2024:HHA, author = "Geetanjali Singh and Namita Mittal and Satyendra Singh Chouhan", title = "{HindiSumm}: a {Hindi} Abstractive Summarization Benchmark Dataset", journal = j-TALLIP, volume = "23", number = "12", pages = "162:1--162:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3696207", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3696207", abstract = "Abstractive Text Summarization (ATS) is a task to create a novel summary by generating fresh sentences incorporating new words or rephrasing the article. It is a complex task as the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "162", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2024:KGG, author = "Yang Zhao and Xiaomian Kang and Yaping Zhang and Jiajun Zhang and Yu Zhou and Chengqing Zong", title = "Knowledge Graph Guided Neural Machine Translation with Dynamic Reinforce-selected Triples", journal = j-TALLIP, volume = "23", number = "12", pages = "163:1--163:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3696664", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3696664", abstract = "Previous methods incorporating knowledge graphs (KGs) into neural machine translation (NMT) adopt a static knowledge utilization strategy, that introduces many useless \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "163", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2024:EDS, author = "Wen-Chieh Huang and Yu-Ling Hsueh", title = "An Emotional Dialogue System Using Conditional Generative Adversarial Networks with a Sequence-to-Sequence Transformer Encoder", journal = j-TALLIP, volume = "23", number = "12", pages = "164:1--164:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3698394", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3698394", abstract = "Understanding the expression of emotion and generating appropriate responses are key steps toward constructing emotional, conversational agents. In this article, we propose a \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "164", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chrismanto:2024:EBN, author = "Antonius Rachmat Chrismanto and Edi Winarko and Yohanes Suyanto", title = "{EiAP-BC}: a Novel Emoji Aware Inter-Attention Pair Model for Contextual Spam Comment Detection Based on Posting Text", journal = j-TALLIP, volume = "23", number = "12", pages = "165:1--165:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3696663", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3696663", abstract = "Detecting spam comments on social media remains a continuously discussed research topic to this day, especially on public figure/celebrity accounts in Indonesia. However, the \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "165", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2024:ICL, author = "Yijiang Liu and Fei Li and Donghong Ji", title = "Improving Cross-lingual Aspect-based Sentiment Analysis with Sememe Bridge", journal = j-TALLIP, volume = "23", number = "12", pages = "166:1--166:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3691342", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3691342", abstract = "Aspect-based Sentiment Analysis (ABSA) comprises numerous subtasks including aspect term extraction (AE), opinion term extraction (OE), opinion pair extraction (PE), and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "166", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kong:2024:RTS, author = "Chunwei Kong and Xueqiang Lv and Le Zhang and Haixing Zhao and Zangtai Cai and Yuzhong Chen", title = "{RPEPL}: {Tibetan} Sentiment Analysis Based on Relative Position Encoding and Prompt Learning", journal = j-TALLIP, volume = "23", number = "12", pages = "167:1--167:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3698575", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3698575", abstract = "Sentiment analysis is a critical task for natural language processing. Much research has been done for high-resource languages such as English and Chinese. However, Tibetan \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "167", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Prabakaran:2024:EDC, author = "Senthil Prabakaran and Navaneetha Krishnan Muthunambu and Nagarajan Jeyaraman", title = "Empowering Digital Civility with an {NLP} Approach for Detecting {X} (Formerly Known as {Twitter}) Cyberbullying through Boosted Ensembles", journal = j-TALLIP, volume = "23", number = "12", pages = "168:1--168:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695251", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3695251", abstract = "As the number of social networking sites grows, so do cyber dangers. Cyberbullying is harmful behavior that uses technology to intimidate, harass, or harm someone, often on \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "168", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ali:2024:EPB, author = "Mubashir Ali and Anees Baqir and Hafiz Husnain Raza Sherazi and Shehzad Khalid and Phillip Smith and Mark Lee", title = "An Extended Pattern Based Comprehensive Stemmer for the {Urdu} Language", journal = j-TALLIP, volume = "23", number = "12", pages = "169:1--169:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3701231", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3701231", abstract = "The Urdu language is used by approximately 200 million people for spoken and written communications on a daily basis. There is a substantial amount of unstructured \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "169", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2024:EET, author = "Guangjun Zhang and Hu Zhang and Ru Li and Hongye Tan", title = "{EADRE}: Event-type Aware Dynamic Representation of Entities in Document-level Event Extraction", journal = j-TALLIP, volume = "23", number = "12", pages = "170:1--170:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3695767", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3695767", abstract = "Document-level event extraction aims to identify event types and arguments from one document. However, existing methods fail to consider semantic distinctions between multiple \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "170", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2024:CAA, author = "Naresh Kumar and Parveen Kumar and Sushreeta Tripathy and Neelamani Samal and Debasis Gountia and Praveen Gatla and Teekam Singh", title = "Context-Aware Adversarial Graph-Based Learning for Multilingual Grammatical Error Correction", journal = j-TALLIP, volume = "23", number = "12", pages = "171:1--171:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3696106", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3696106", abstract = "Correcting grammatical errors in various language contexts is a crucial and challenging task in the field of natural language processing, commonly referred to as Multilingual \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "171", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Obiang:2024:ITR, author = "Saint Germes B. Bengono Obiang and Norbert Tsopze and Paulin Melatagia Yonta and Jean-Fran{\c{c}}ois Bonastre and Tania Jim{\'e}nez", title = "Improving Tone Recognition Performance using {Wav2vec 2.0}-Based Learned Representation in {Yoruba}, a Low-Resourced Language", journal = j-TALLIP, volume = "23", number = "12", pages = "172:1--172:??", month = dec, year = "2024", CODEN = "????", DOI = "https://doi.org/10.1145/3690384", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed Dec 18 09:22:15 MST 2024", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3690384", abstract = "Many sub-Saharan African languages are categorized as tone languages, and for the most part, they are classified as low-resource languages due to the limited resources \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "172", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2025:DKM, author = "Telem Joyson Singh and Sanasam Ranbir Singh and Priyankoo Sarmah", title = "Distilling Knowledge in Machine Translation of Agglutinative Languages with Backward and Morphological Decoders", journal = j-TALLIP, volume = "24", number = "1", pages = "1:1--1:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703455", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3703455", abstract = "Agglutinative languages often have morphologically complex words (MCWs) composed of multiple morphemes arranged in a hierarchical structure, posing \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{A:2025:SDL, author = "Madhavaraj A and Bharathi Pilar and Ramakrishnan Angarai Ganesan", title = "Subword Dictionary Learning and Segmentation for Expanding the Vocabulary of Automatic Speech Recognition in {Tamil} and {Kannada}", journal = j-TALLIP, volume = "24", number = "1", pages = "2:1--2:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705312", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3705312", abstract = "We present automatic speech recognition (ASR) systems for Tamil and Kannada based on subword modeling to effectively handle unlimited vocabulary arising due to the highly \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bolucu:2025:SIG, author = "Necva B{\"o}l{\"u}c{\"u} and Burcu Can", title = "Semantically-Informed Graph Neural Networks for Irony Detection in {Turkish}", journal = j-TALLIP, volume = "24", number = "1", pages = "3:1--3:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705610", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3705610", abstract = "Social media plays an important role in expressing the thoughts and sentiments of users. Irony is a way of stating a sentiment about something by expressing the opposite \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dang:2025:UBR, author = "Xiaochao Dang and Guozhen Ding and Xiaohui Dong and Fenfang Li and Shiwei Gao and Yue Wang", title = "{UIE}-Based Relational Extraction Task for Mine Hoist Fault Data", journal = j-TALLIP, volume = "24", number = "1", pages = "4:1--4:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705313", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3705313", abstract = "Information extraction is pivotal in natural language processing, where the goal is to convert unstructured text into structured information. A significant challenge in this domain is \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2025:SYE, author = "Nankai Lin and Meiyu Zeng and Wentao Huang and Shengyi Jiang and Lixian Xiao and Aimin Yang", title = "A Simple Yet Effective Corpus Construction Framework for {Indonesian} Grammatical Error Correction", journal = j-TALLIP, volume = "24", number = "1", pages = "5:1--5:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3704264", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3704264", abstract = "Currently, the majority of research in grammatical error correction (GEC) is concentrated on universal languages, such as English and Chinese. Many \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Acar:2025:RCS, author = "Ali Acar and Selma Tekir", title = "Recognition of Counterfactual Statements in {Turkish}", journal = j-TALLIP, volume = "24", number = "1", pages = "6:1--6:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3706105", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3706105", abstract = "Counterfactual statements are examples of causal reasoning as they describe events that did not happen and, optionally, those events' consequences if they happened. \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tabassam:2025:UUP, author = "Muhammad Rauf Tabassam and Hajra Waheed and Iqra Safder and Raheem Sarwar and Naif Radi Aljohani and Raheel Nawaz and Saeed-Ul Hassan and Farooq Zaman and Ahtazaz Ahsan", title = "{UPON}: {Urdu} Poetry Generation Using Deep Learning: a Novel Approach and Evaluation", journal = j-TALLIP, volume = "24", number = "1", pages = "7:1--7:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708535", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708535", abstract = "Poetry represents the oldest and most esteemed literary form, allowing poets to convey ideas while carefully attending to elements such as meaning, coherence, poetic quality, and \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:VSG, author = "Yuanlong Wang and Qiang Ma and Ru Li and Hu Zhang", title = "Visual Story Generation Model Guided by Multi Granularity Image Information", journal = j-TALLIP, volume = "24", number = "1", pages = "8:1--8:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708886", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708886", abstract = "Visual story generation, which involves generating short stories from sequential images, has become a core task at the intersection of computer vision and natural language \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "8", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sun:2025:SFG, author = "Teng Sun and Zhenqiu Shu and Yuxin Huang and Hongbin Wang and Zhengtao Yu", title = "Semantic Feature Graph Consistency with Contrastive Cluster Assignments for Multilingual Document Clustering", journal = j-TALLIP, volume = "24", number = "1", pages = "9:1--9:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708887", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708887", abstract = "Multilingual document clustering (MDC) aims to partition multilingual documents into distinct clusters based on topic categories in an unsupervised manner. However, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "9", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Zoghby:2025:OMM, author = "Aya M. Al-Zoghby and Esraa Mohamed K. Al-Awadly and Ahmed Ismail Ebada and Wael A. Awad", title = "Overview of Multimodal Machine Learning", journal = j-TALLIP, volume = "24", number = "1", pages = "10:1--10:??", month = jan, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701031", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Feb 4 06:06:29 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3701031", abstract = "Human nature is fundamentally driven by the need for interaction and attention, which are fulfilled through various sensory modalities, including hearing, sight, touch, taste, \ldots{}.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "10", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2025:GMF, author = "Dong Zhou and Qiang Ouyang and Nankai Lin and Yongmei Zhou and Aimin Yang", title = "{GS2F}: Multimodal Fake News Detection Utilizing Graph Structure and Guided Semantic Fusion", journal = j-TALLIP, volume = "24", number = "2", pages = "11:1--11:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708536", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708536", abstract = "The prevalence of fake news online has become a significant societal concern. To combat this, multimodal detection techniques based on images and text have shown promise. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "11", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Boutadjine:2025:HVM, author = "Amal Boutadjine and Fouzi Harrag and Khaled Shaalan", title = "Human vs. Machine: a Comparative Study on the Detection of {AI}-Generated Content", journal = j-TALLIP, volume = "24", number = "2", pages = "12:1--12:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708889", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708889", abstract = "The surge in advancements in large language models (LLMs) has expedited the generation of synthetic text imitating human writing styles. This, however, raises concerns about \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "12", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nasr:2025:ERS, author = "Latifa Ibn Nasr and Abir Masmoudi and Lamia Hadrich Belguith", title = "Emotion Recognition from Spontaneous {Tunisian} Dialect Speech", journal = j-TALLIP, volume = "24", number = "2", pages = "13:1--13:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708340", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708340", abstract = "Emotional expressions are a fundamental aspect of human communication, with speech being one of the most natural modes of interaction. Speech Emotion Recognition (SER) is a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "13", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Aljeezani:2025:AAR, author = "Othman Aljeezani and Dorieh Alomari and Irfan Ahmad", title = "{Arabic} App Reviews: Analysis and Classification", journal = j-TALLIP, volume = "24", number = "2", pages = "14:1--14:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708987", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708987", abstract = "User opinions and feedback on mobile applications are crucial for application developers, offering insights into issues like bugs, popular features, and enhancement requests. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "14", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sattar:2025:DER, author = "Hafsa Sattar and Mubasher Munir and Muhammad Kamran Malik and Zara Nasar", title = "Detecting Empathy in {Roman Urdu} Using Transfer Learning", journal = j-TALLIP, volume = "24", number = "2", pages = "15:1--15:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711825", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711825", abstract = "Empathy means to understand the other person's feelings and emotions to become familiar with their situation. This work aims to create a Roman Urdu empathy dataset containing two \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "15", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Teng:2025:INM, author = "Bingtao Teng and Yuan Chen and Juwei Zhang", title = "Improving Neural Machine Translation in the Field of Electrical Engineering by Using Sentence Backbone Information", journal = j-TALLIP, volume = "24", number = "2", pages = "16:1--16:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712261", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3712261", abstract = "Due to the limited availability of corpora in the field of Electrical Engineering and the presence of numerous specialized terms, neural machine translation (NMT) performs poorly in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "16", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Pokhriyal:2025:SSD, author = "Himani Pokhriyal and Goonjan Jain", title = "Supposititious Sarcasm Detection and Sentiment Analysis Coping {Hindi} Language in Social Networks Harnessing {Zipf--Mandelbrot} Probabilistic Optimisation and Perplexity Entropy Learning", journal = j-TALLIP, volume = "24", number = "2", pages = "17:1--17:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712061", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3712061", abstract = "Sarcasm is used to convey contempt via poking and ridiculing the other person. It is frequently used to make fun of other people by saying unpleasant things. The presence of \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "17", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Taji:2025:PDO, author = "Mobina Taji and Arash Ghafouri and Hasan Naderi and Behrouz Minaei-Bidgoli", title = "{PersianMHQA}: a Dataset for Open Domain {Persian} Multi-hop Question Answering Based on {Wikipedia} Encyclopedia", journal = j-TALLIP, volume = "24", number = "2", pages = "18:1--18:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711826", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711826", abstract = "Today, one of the most important tasks in natural language processing is answering user questions. Especially, users' questions nowadays moved from simple questions to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "18", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tang:2025:TTC, author = "Chao Tang and Zelin Tan and Xiaobing Zhao", title = "{Tibetan} Text Classification based on Prompt Learning and Ensemble Learning", journal = j-TALLIP, volume = "24", number = "2", pages = "19:1--19:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711827", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711827", abstract = "With the advancement of pre-trained language models, prompt learning has emerged as a trend for text classification. It offers several advantages over traditional machine \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "19", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bagies:2025:CAD, author = "Taghreed Bagies and Rahaf Alsuhaimi and Miada Almasre and Alaa Bafail", title = "A Comprehensive Analysis Dashboard for Detecting Similar {Saudi} {Twitter} Accounts by Using Stylometric Features", journal = j-TALLIP, volume = "24", number = "2", pages = "20:1--20:??", month = feb, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705002", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Feb 21 09:14:43 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3705002", abstract = "Criminals, including terrorists, may use Twitter to communicate and share their ideologies. They often employ multiple accounts for anonymity. While the other accounts hide their \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "20", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hong:2025:NMH, author = "Trung Phan Hong and Phuc Do", title = "A Novel Multi-hop Query Answering System Based on a Large Knowledge Graph and Distributed Computing", journal = j-TALLIP, volume = "24", number = "3", pages = "21:1--21:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711824", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711824", abstract = "An automated query answering system (QAS) is a very useful application in organizations. Therefore, there is a lot of research to build, develop and improve it. In this article, we present a method to build a multi-hop query answering system (MQAS) based \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "21", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mohan:2025:MAH, author = "Jayanth Mohan and Spandana Reddy Mekapati and Premjith B and Jyothish Lal G and Bharathi Raja Chakravarthi", title = "A Multimodal Approach for Hate and Offensive Content Detection in {Tamil}: From Corpus Creation to Model Development", journal = j-TALLIP, volume = "24", number = "3", pages = "22:1--22:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712260", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3712260", abstract = "Detecting hate speech on social media platforms is vital to mitigate technology-facilitated violence. Extensive research has targeted widely spoken languages like English, but there is a notable gap in studying hate speech detection in low-resource \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "22", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alajmi:2025:AQG, author = "Anwar Alajmi and Haniah Altabaa and Sa'ed Abed and Imtiaz Ahmad", title = "{Arabic} Question Generation Using Transformers", journal = j-TALLIP, volume = "24", number = "3", pages = "23:1--23:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3701559", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3701559", abstract = "After the increased reliance on online education, online assessment became an essential tool for educators to remotely monitor and evaluate students' understanding in order to assist them properly. However, the laborious process of creating exam questions \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "23", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:SAM, author = "Haijie Wang and Jiajia Jiao", title = "Sentiment Analysis of {MOOC} Reviews Based on Knowledge Dependency Tree", journal = j-TALLIP, volume = "24", number = "3", pages = "24:1--24:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3713073", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3713073", abstract = "As an important online learning resource, Massive Open Online Courses have a large amount of comments, which can be exploited by aspect-level sentiment analysis to optimize MOOC teaching from different perspectives. However, there are two essential \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "24", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2025:HSD, author = "Deepawali Sharma and Tanusree Nath and Vedika Gupta and Vivek Kumar Singh", title = "Hate Speech Detection Research in {South Asian} Languages: a Survey of Tasks, Datasets and Methods", journal = j-TALLIP, volume = "24", number = "3", pages = "25:1--25:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711710", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711710", abstract = "Social media has over the years emerged as a powerful platform for communicating and sharing views, thoughts, and opinions. However, at the same time it is being abused by certain individuals to spread hate against individuals, communities, religions, and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "25", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rai:2025:TSL, author = "Abigail Rai and Samarjeet Borah", title = "Tokenization and Stemming of {Limbu} Language", journal = j-TALLIP, volume = "24", number = "3", pages = "26:1--26:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3712018", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3712018", abstract = "Significant issues in tokenization and stemming in natural language processing are addressed in this work, focusing primarily on the Limbu language. Two essential preprocessing procedures that work to normalize words by condensing them into compact \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "26", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wenbo:2025:LTC, author = "Zhang Wenbo and Dang Hongbo and Bao Zhenshan and Song Bingyan", title = "{LAMGCN}: Traditional {Chinese} Medicine Herb Recommendation via {LSTMs} with Attention Mechanisms and Graph Convolutional Networks", journal = j-TALLIP, volume = "24", number = "3", pages = "27:1--27:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3708888", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3708888", abstract = "Herb recommendation plays a crucial role in the therapeutic process of Traditional Chinese Medicine (TCM), which aims at recommending a set of herbs to treat patients with different symptoms. Previous works used many methods to discover regularities in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "27", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tian:2025:DPB, author = "Yu Tian and Junhui Li and Suyang Zhu and Guodong Zhou", title = "{DialoguePFM}: Prompt-based Fusion Model for Emotion Recognition in Conversation", journal = j-TALLIP, volume = "24", number = "3", pages = "28:1--28:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3714410", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3714410", abstract = "Emotion recognition in conversation (ERC) presents a significant challenge in natural language processing. In this study, we propose the Prompt-based Fusion Model (DialoguePFM), which innovatively introduces an emotion representation that conveys the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "28", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:PPL, author = "Shiming Wang and Li-Ping Chen and Yang Ai and Yajun Hu and Zhen-Hua Ling", title = "{PhonemeVec}: a Phoneme-Level Contextual Prosody Representation For Speech Synthesis", journal = j-TALLIP, volume = "24", number = "3", pages = "29:1--29:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711828", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711828", abstract = "Recently, fine-grained prosody representations have emerged and attracted growing attention to address the one-to-many problem in text-to-speech (TTS). In this article, we propose the PhonemeVec, a pre-trained prosody representations with considering the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "29", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Emiru:2025:GGE, author = "Eshete Derb Emiru and Desalegn Mamo Wendyifraw", title = "{Ge{\'e}z} Grammar Error Handling Using Neural Machine Translation Approach", journal = j-TALLIP, volume = "24", number = "3", pages = "30:1--30:??", month = mar, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711829", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Mar 25 09:30:51 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", URL = "https://dl.acm.org/doi/10.1145/3711829", abstract = "The goal of natural language processing (NLP), which has recently gained popularity, is to improve the capacity of computers to comprehend and interact with human language. Consequently, to converse using natural language, it is crucial that spoken \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "30", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2025:CQE, author = "Jie Chen and Yinlong Wang and Shu Zhao and Peng Zhou and Yanping Zhang", title = "Contextualized Quaternion Embedding Towards Polysemy in Knowledge Graph for Link Prediction", journal = j-TALLIP, volume = "24", number = "4", pages = "31:1--31:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3714411", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "To meet the challenge of incompleteness within Knowledge Graphs, Knowledge Graph Embedding (KGE) has emerged as the fundamental methodology for predicting the missing link (Link Prediction), by mapping entities and relations as low-dimensional vectors in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "31", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2025:SDC, author = "Yujie Li and Hongfang Zhou and Yinbo Xin", title = "A {DA-STCM} Dual-channel Short Text Classification Method Based on Attention Mechanism", journal = j-TALLIP, volume = "24", number = "4", pages = "32:1--32:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715911", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Short-text classification is an important and challenging task in natural language processing. Compared with long text, short text suffers from insufficient semantic information. Based on a lot of theoretical and experimental analysis, a two-channel short-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "32", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Xu:2025:BDA, author = "Junjie Xu and Luwei Xiao and Anran Wu and Tianlong Ma and Daoguo Dong and Liang He", title = "Bidirectional Directed Acyclic Graph Neural Network for Aspect-level Sentiment Classification", journal = j-TALLIP, volume = "24", number = "4", pages = "33:1--33:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3716501", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "To achieve outstanding aspect-level sentiment analysis (ASC), it is crucial to reduce the distance between aspect terms and opinion words. Recently, advanced methods in ASC used graph neural network (GNN)-based methods to leverage the syntactic dependency \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "33", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmad:2025:SIC, author = "Iram Ali Ahmad and Praveen Gatla and Rajesh Kumar Mundotiya", title = "Sarcasm Identification and Classification in {Hindi} Newspaper Headlines", journal = j-TALLIP, volume = "24", number = "4", pages = "34:1--34:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3714469", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Sarcasm identification in textual data is the most captivating area of research in the current research trends. It is a challenging task for humans as well as for the computer. In this article, we have tried to identify sarcasm in the Hindi newspaper \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "34", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Choudhury:2025:ESA, author = "Pankaj Choudhury and Prithwijit Guha and Sukumar Nandi", title = "Exploring Semantic Attributes for Image Caption Synthesis in Low-Resource {Assamese} Language", journal = j-TALLIP, volume = "24", number = "4", pages = "35:1--35:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3717612", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Research on image caption generation has predominantly focused on resource-rich languages like English, leaving resource-poor languages (like Assamese and several others) largely understudied. In this context, this paper leverages both visual and semantic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "35", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Alyousf:2025:SDS, author = "Mona Alyousf and Mohamad Firas Alhalabi", title = "A Survey of Document Stemming Algorithms in Information Retrieval Systems", journal = j-TALLIP, volume = "24", number = "4", pages = "36:1--36:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3715120", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "With the increase in the growth and diversity of databases and the enormity of their contents, there has become an urgent need to find advanced techniques in Natural Language Processing (NLP) applications, especially in the field of Information Retrieval \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "36", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Song:2025:MDS, author = "Xinglong Song and Changlin Song and Haolu Yu and Yonghua Zhu and Hong Yao", title = "{MixSong}: Diverse and Strictly Formatted {Chinese} Poetry Generation", journal = j-TALLIP, volume = "24", number = "4", pages = "37:1--37:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3718331", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Chinese poetry, renowned for its elegance and simplicity, is a hallmark of Chinese culture. While neural networks have made significant advancements in generating poetry, balancing diversity with adherence to rigid structural formats remains a challenge. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "37", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:NBB, author = "Youwei Wang and Lizhou Feng", title = "New Bagging Based Ensemble Learning Algorithm Distinguishing Short and Long Texts for Document Classification", journal = j-TALLIP, volume = "24", number = "4", pages = "38:1--38:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3718740", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "To improve the classification accuracy of ensemble learning, a new bootstrap aggregating (Bagging) ensemble learning algorithm distinguishing short and long texts for document classification is proposed. First, the performances of different typical deep \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "38", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Connor:2025:HSR, author = "Patrick Charles Connor", title = "A Hybrid Statistical and Rule-based Approach to Extremely Low-resource Machine Transliteration", journal = j-TALLIP, volume = "24", number = "4", pages = "39:1--39:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3720542", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Machine transliteration work has focused primarily on languages with large volumes of parallel corpus, and between language pairs whose orthographies are very different. In contrast, a large proportion of the world's languages have vastly fewer resources \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "39", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{K:2025:TTB, author = "Reji Rahmath K. and Reghu Raj P.c and Rafeeque P.c", title = "{TransQAM}: Transformer-based Question Answering System in {Malayalam}", journal = j-TALLIP, volume = "24", number = "4", pages = "40:1--40:??", month = apr, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3718085", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Thu Apr 17 06:19:42 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Question Answering (QA) systems are used to extract the exact answer from a given context. In this study, we have implemented a QA system named TransQAM with BERT and its variants for the low-resource Malayalam language. We have considered the transformer \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "40", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Phan:2025:TOT, author = "Thi-Hoai Phan and Hoang-Son Bui and Tri Trung Kien Le and Thi-Ngoc-Diep Do and Thuy-Binh Nguyen and Hong-Quan Nguyen and Thanh-Hai Tran and Thanh-Thuy Pham and Thi-Lan Le", title = "Towards an Online Text-Based Person Search in {Vietnamese} Language", journal = j-TALLIP, volume = "24", number = "5", pages = "41:1--41:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721979", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In recent years, many efforts have been dedicated to text-based person search, thanks to its potential applications in various domains. However, most of these works focus on person search via queries in English and conduct offline evaluations. Despite \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "41", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhuang:2025:TQG, author = "Yan Zhuang and Yuan Sun and Yijie Li and Sisi Liu and Xiaobing Zhao", title = "{Tibetan} Question Generation Based on Key Sentence and Knowledge Graph", journal = j-TALLIP, volume = "24", number = "5", pages = "42:1--42:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725531", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Question generation aims to generate questions according to the given context and answer, and it has made significant progress in both Chinese and English languages. However, research on Tibetan question generation is still in the early stages, with key \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "42", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ma:2025:DMT, author = "Xinchen Ma and Man Lan and Wenbo Hu and Yue Lu", title = "{Dongba} Machine Translation with Transfer Learning: Leveraging Pre-trained Ancient {Chinese} Models", journal = j-TALLIP, volume = "24", number = "5", pages = "43:1--43:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3721980", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The Dongba script, a logographic writing system used by the Naxi people in religious activities, faces challenges in translation due to the advanced age of Dongba script experts and the time-consuming nature of manual deciphering. This study focuses on \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "43", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2025:LCS, author = "Yibo Huang and Li An and Qiuyu Zhang", title = "Low Complexity Speech Secure Hash Retrieval Algorithm Based on {KDTree} Nearest Neighbor Search", journal = j-TALLIP, volume = "24", number = "5", pages = "44:1--44:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723161", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "With the continuous growth of dimensions in retrieval systems, only a few data points are distributed near the center (empty space phenomenon), and the distance between data points in high-dimensional space is nearly equal (dimensional effect), resulting in high complexity and low accuracy in retrieval. Aiming at the preceding problems, this article designs a speech secure hash retrieval scheme. In this scheme, the spectral subband centroids of speech are extracted to generate the feature vector, then the biometric template index is established by KDTree classification, and the specific SHA256-Ushiki chaotic encryption algorithm key is allocated to each index. The security framework is constructed according to the cancelable biometric template generated by the combination of classification and distribution key, and the binary hash vector is generated, then the hash vector is encrypted. Experimental results show that through the establishment of the KDTree cancelable biometric template index, the super rectangular region of the $K$-dimensional space is constructed, which effectively solves the empty space phenomenon and the dimensional effect. Through the KDTree nearest neighbor search, the algorithm reduces the number of matches between classes, which effectively reduces computational complexity and accuracy problems. The tampering comparison of mobile terminal realizes the content verifiable retrieval. The speech encryption effectively prevents the leakage of plaintext and ensures security of the speech storage and transmission process.", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "44", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Le:2025:LLS, author = "The Anh Le and Hai Son Le", title = "{LatVis}: Large-scale Task-specific Language Model for Low-resource {Vietnamese} Multi-document Summarization", journal = j-TALLIP, volume = "24", number = "5", pages = "45:1--45:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725848", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The Vietnamese multi-document summarization task faces three key challenges including the long input sequence problem, human-like summary generation, and the scarcity of labeled data. Transformer-based models, enhanced by parallel computation \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "45", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ahmed:2025:UWS, author = "Anil Ahmed and Degen Huang and Syed Yasser Arafat and Khawaja Iftekhar Rashid", title = "{Urdu} Word Sense Disambiguation: Leveraging Contextual Stacked Embedding, {Siamese} Transformer Encoder {1DCNN-BiLSTM}, and Gloss Data Augmentation", journal = j-TALLIP, volume = "24", number = "5", pages = "46:1--46:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3719293", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Word Sense Disambiguation (WSD) in Natural Language Processing (NLP) is crucial for discerning the correct meaning of words with multiple senses in various contexts. Recent advancements in this field, particularly Deep Learning (DL) and sophisticated \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "46", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mazumder:2025:ICM, author = "Debajyoti Mazumder and Aakash Kumar and Jasabanta Patro", title = "Improving Code-Mixed Hate Detection by Native Sample Mixing: a Case Study for {Hindi--English} Code-Mixed Scenario", journal = j-TALLIP, volume = "24", number = "5", pages = "47:1--47:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3726866", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Hate detection has long been a challenging task for the NLP community. The task becomes complex in a code-mixed environment because the models must understand the context and the hate expressed through language alteration. Compared to the monolingual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "47", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mortadi:2025:IIM, author = "Ahmad Mortadi and Waleed Nazih and Mohamed I. Eldesouki and Yasser Hifny", title = "Intelligent De-Identification of Medical Discharge Summaries Using Hybrid {NLP} Techniques", journal = j-TALLIP, volume = "24", number = "5", pages = "48:1--48:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3724118", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Medical discharge summaries are vital documents in healthcare, often containing Personally Identifiable Information (PII), raising concerns regarding privacy and regulatory compliance. This article proposes a cutting-edge approach that utilizes \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "48", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Li:2025:SAA, author = "Wei Li and Yi Li and Yanqiu Shao and Mengxi Bi", title = "A Structure-aware Approach Leveraging Semantic Relevance Graph for Annotation Alignment of {Chinese} Classics", journal = j-TALLIP, volume = "24", number = "5", pages = "49:1--49:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3725733", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Throughout the long history of China, ideologists have annotated classical texts, providing a valuable resource for scholarly study. Many of these annotations are presented as entire paragraphs, each sentence of which must be linked to the corresponding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "49", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lora:2025:CCL, author = "Sanzana Karim Lora and M. Sohel Rahman and Rifat Shahriyar", title = "{ConVerSum}: a Contrastive Learning-Based Approach for Data-Scarce Solution of Cross-Lingual Summarization Beyond Direct Equivalents", journal = j-TALLIP, volume = "24", number = "5", pages = "50:1--50:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3722109", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Cross-lingual summarization (CLS) is a sophisticated branch in Natural Language Processing that demands models to accurately translate and summarize articles from different source languages. Despite the improvement of the subsequent studies, this area \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "50", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ranathunga:2025:SST, author = "Surangika Ranathunga and Rumesh Sirithunga and Himashi Rathnayake and Lahiru de Silva and Thamindu Aluthwala and Saman Peramuna and Ravi Shekhar", title = "{SiTSE}: {Sinhala} Text Simplification Dataset and Evaluation", journal = j-TALLIP, volume = "24", number = "5", pages = "51:1--51:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3723160", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Text Simplification is a task that has been minimally explored for low-resource languages. Consequently, there are only a few manually curated datasets. In this article, we present a human-curated sentence-level text simplification dataset for the Sinhala \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "51", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:LAT, author = "Ya Wang and Cungen Cao and Xin Huang and Ming Hui and Wei Zheng", title = "A Linguistic Approach Towards Automatic Acquisition and Classification of Common {Chinese} Affective Events", journal = j-TALLIP, volume = "24", number = "5", pages = "52:1--52:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3726526", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Affective events are events that are typically associated with a positive or negative emotional state. For example, get food is a desirable event, while suffer from asthma is an undesirable event. Identifying affective events and their polarities is \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "52", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:FCI, author = "Xiaowen Liu and Xiaoli Zhao and Jiale Chen and Zehui Li", title = "{FedINER}: {Chinese} Industrial Named Entity Recognition with Federated Learning", journal = j-TALLIP, volume = "24", number = "5", pages = "53:1--53:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3730401", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Named entity recognition (NER) is foundational in constructing industrial knowledge graphs and is an essential component in the automation of knowledge within the industry. However, due to the particularities of the industrial field, its data involves \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "53", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sulistyo:2025:PLR, author = "Danang Arbian Sulistyo and Didik Dwi Prasetya and Fadhli Almu'iini Ahda and Aji Prasetya Wibawa", title = "Pivoted Low Resource Multilingual Translation with {NER} Optimization", journal = j-TALLIP, volume = "24", number = "5", pages = "54:1--54:??", month = may, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3727876", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Wed May 21 06:16:17 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Machine translation (MT) has advanced significantly with neural machine translation (NMT) models like BERT, GPT, and MarianMT, which leverage deep learning to provide more accurate and natural translations. However, low-resource languages such as Javanese \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "54", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hou:2025:ABS, author = "Linlin Hou and Wenhui Tu and Ting Yu and Ting Jiang and Mohamed Bah and Zenghui Xu and Yu Zhang and Gaoming Yang and Ji Zhang", title = "Aspect-based Sentiment Analysis for {COVID-19}: a Heterogeneous Graph Convolutional Network Approach", journal = j-TALLIP, volume = "24", number = "6", pages = "55:1--55:26", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3731758", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The epidemic of infectious diseases has a significant impact on society, the economy, and people's lives. Social media, with its high user participation and rapid information \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "55", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:DSL, author = "Haijiang Liu and Chen Qiu and Qiyuan Li and Maofu Liu and Li Wang and Jinguang Gu", title = "Deep Stable Learning for Cross-lingual Dependency Parsing", journal = j-TALLIP, volume = "24", number = "6", pages = "56:1--56:34", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3735509", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The Cross-lingual Dependency Parsing (XDP) task poses a significant challenge due to the differences in dependency structures between training and testing languages, known \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "56", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Le:2025:OAG, author = "Huong Le and Ngoc Luu and Thanh Nguyen and Tuan Dao and Sang Dinh", title = "Optimizing Answer Generator in {Vietnamese} Legal Question Answering Systems Using Language Models", journal = j-TALLIP, volume = "24", number = "6", pages = "57:1--57:17", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3732938", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The development of large language models (LLMs) such as ChatGPT and Gemini has led to impressive advancements in question answering (QA) systems. However, they often \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "57", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Boughareb:2025:ACD, author = "Djalila Boughareb and Rima Boughareb and Samir Hallaci and Mohammed Raid Elislam Boukherouba and Hamid Seridi", title = "Addressing the Complexity of Dialectal {Arabic}: an Enhanced Encoder-Decoder Ensemble Approach for Optimized Sentiment Analysis", journal = j-TALLIP, volume = "24", number = "6", pages = "58:1--58:21", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3735972", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Sentiment analysis has become an essential tool in understanding global narratives across social, economic, political, and commercial sectors. As social media platforms increasingly \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "58", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dai:2025:TTT, author = "Wei Dai and Dongfang Han and Turdi Tohti and Yi Liang and Zicheng Zuo and Yuanyuan Liao and Qingwen Yang", title = "{TFT-TL}: Token-Level Filter Training Transfer Learning for Low-Resource Neural Machine Translation", journal = j-TALLIP, volume = "24", number = "6", pages = "59:1--59:19", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3732779", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Transfer learning plays a crucial role in low-resource machine translation by addressing the challenge of poor model performance due to limited data in low-resource languages, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "59", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sethiya:2025:ISL, author = "Nivedita Sethiya and Saanvi Nair and Puneet Walia and Chandresh Maurya", title = "{Indic-ST}: a Large-Scale Multilingual Corpus for Low-Resource Speech-to-Text Translation", journal = j-TALLIP, volume = "24", number = "6", pages = "60:1--60:25", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736720", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We introduce Indic-ST, a novel dataset for speech-to-text translation (ST) task from English to Indic languages to bridge the performance gap. ST involves \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "60", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:OEA, author = "Lixin Liu and Aiwen Jiang and Yinuo Chen and Changhong Liu and Qi Huang and Mingwen Wang", title = "Open-ended Autoregressive Visual Storytelling via Parameter Efficient Instruction Tuning", journal = j-TALLIP, volume = "24", number = "6", pages = "61:1--61:19", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736759", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Visual storytelling (VIST) involves generating coherent, creative, and vivid narrative for a collection of images. It remains an immense challenge within cross-modal domain. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "61", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Albilali:2025:AAK, author = "Eman Albilali and Nora Al-Twairesh and Manar Hosny", title = "{AKER}: {Arabic} Knowledge-enriched Reader for Machine Reading Comprehension", journal = j-TALLIP, volume = "24", number = "6", pages = "62:1--62:29", month = jun, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736164", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:56 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Machine reading comprehension aims at understanding a passage and answer a given question by selecting a span from the passage. Recently, pre-trained language models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "62", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Minoofam:2025:DQC, author = "Seyyed Amir Hadi Minoofam and Azam Bastanfard and Mohammad Reza Keyvanpour", title = "{DB-QM}: a Comparative Quality Measurement and Its Prospective on {Persian\slash Arabic} Databases for {OCR}", journal = j-TALLIP, volume = "24", number = "7", pages = "63:1--63:25", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3711714", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In Optical Character Recognition (OCR), state-of-the-art algorithms are applied to the same databases to compare performance and cost. Various benchmark databases have been created recently in Persian script to facilitate OCR application development. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "63", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mehamed:2025:HAA, author = "Moges Ahmed Mehamed and Shengwu Xiong and Awet Fesseha Aberha", title = "Hybrid Approach for Automatic Text Summarization for Low-resourced {Amharic} Language", journal = j-TALLIP, volume = "24", number = "7", pages = "64:1--64:14", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3743677", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Automatic text summarization creates a concise version of the given document while retaining the original content's core ideas, logical structure, and understandability. Despite extensive research on summarization in English and other languages, there \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "64", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Abdulhamed:2025:ENN, author = "Ahmed Abdulhamed and Prabhat Ranjan and Shengwu Xiong", title = "Entity Naming in {NLP}: Hybrid Approach {GPT} Transformer and Multi-level {RNN}", journal = j-TALLIP, volume = "24", number = "7", pages = "65:1--65:26", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744903", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This research addresses the limitations of existing entity-naming algorithms in natural language processing when the algorithm faces the complexities of polysemy and intricate sentence structures. We propose a novel Transformer-multi-level fusion \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "65", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Huang:2025:ECV, author = "Yuxin Huang and Yuanlin Yang and Zhengtao Yu and Yantuan Xian and Yan Xiang", title = "Enhanced {Chinese--Vietnamese} Cross-Language Event Detection via Aligned Knowledge Event Graph", journal = j-TALLIP, volume = "24", number = "7", pages = "66:1--66:19", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3736410", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Chinese-Vietnamese cross-language event detection aims to cluster texts that describe the same events in Chinese and Vietnamese into corresponding event clusters. However, because Vietnamese is a low-resource language, directly using multilingual pre-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "66", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Aldawsari:2025:AAE, author = "Mohammed Aldawsari and Omer Dawood", title = "{AraEventCoref}: an {Arabic} Event Coreference Dataset and {LLM} Benchmarks", journal = j-TALLIP, volume = "24", number = "7", pages = "67:1--67:12", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3743047", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Event coreference resolution is a critical task in Natural Language Processing (NLP), enabling applications such as information extraction, text summarization, and question answering. However, resolving event coreference in Arabic presents unique \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "67", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Maitra:2025:DBS, author = "Arkadip Maitra and Shree Mitra and Siladittya Manna and Saumik Bhattacharya and Umapada Pal", title = "Decorrelation-Based Self-Supervised Visual Representation Learning for Writer Identification", journal = j-TALLIP, volume = "24", number = "7", pages = "68:1--68:17", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746062", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Self-supervised learning has developed rapidly over the last decade and has been applied in many areas of computer vision. Decorrelation-based self-supervised pretraining has shown great promise among non-contrastive algorithms, yielding performance at \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "68", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Trinh:2025:VKG, author = "Tam Trinh and Anh Dao and Thi Hong Nhung Hy and Truong Son Hy", title = "{VietMedKG}: Knowledge Graph and Benchmark for Traditional {Vietnamese} Medicine", journal = j-TALLIP, volume = "24", number = "7", pages = "69:1--69:17", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744740", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Traditional Vietnamese Medicine (TVM) and Traditional Chinese Medicine (TCM) have shared significant similarities due to their geographical location, cultural exchanges, and hot and humid climatic conditions. However, unlike TCM, which has substantial \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "69", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:IUD, author = "Yuanying Wang and Fuyong Xu and Yingzheng Zhu and Guangjin Wang and Peiyu Liu and Ran Lu", title = "{IMDP}: a Unify Dialogue Framework with Awareness and Understanding for Implicit Personalized Dialogue Generation", journal = j-TALLIP, volume = "24", number = "7", pages = "70:1--70:21", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3674733", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Personalized chatbots concentrate on learning human personalities, making them act similar to real users. When it is authorized to respond to other people's messages, it has the same way of speaking as the user. Many personalized methods have been \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "70", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yung:2025:EEP, author = "Yiu Cheong Yung and Ying-Jia Lin and Hung-Yu Kao", title = "Exploring the Effectiveness of Pre-training Language Models with Incorporation of Diglossia for {Hong Kong} Content", journal = j-TALLIP, volume = "24", number = "7", pages = "71:1--71:16", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744341", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In this article, we present our works to create the first Hong Kong content-based public pre-training dataset and the experiments which resulted in the creation of ELECTRA-based models for commonly used languages in Hong Kong. The creation of pre-training \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "71", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bawitlung:2025:MAS, author = "Andrew Bawitlung and Sandeep Kumar Dash and Radha Mohan Pattanayak", title = "{Mizo} Automatic Speech Recognition: Leveraging {Wav2vec 2.0 and} {XLS-R} for Enhanced Accuracy in Low-Resource Language Processing", journal = j-TALLIP, volume = "24", number = "7", pages = "72:1--72:15", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746063", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This study introduces a Mizo Automatic Speech Recognition (ASR) approach by fine-tuning Wav2vec 2.0 and XLS-R models. The research presents the newly developed Mizo speech dataset, MiZonal v1.0 which significantly contributes to the advancement of low-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "72", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hu:2025:TAE, author = "Weilin Hu and Wei Lin and Qizheng Li and Xiaona Yu and Chengyan Zhu", title = "Textile {AI}-Enhanced Translation System Based on Mapping Probability and In-context Learning", journal = j-TALLIP, volume = "24", number = "7", pages = "73:1--73:17", month = jul, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3746227", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:54 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Large language models (LLMs), such as ChatGPT, offer powerful customized and personalized translation services, and have been increasingly integrated into various specialized fields. However, when translating low-frequency terms in professional domains, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "73", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Taylor:2025:MSA, author = "Serena Taylor and Fariza Fauzi", title = "Multimodal Sentiment Analysis for the {Malay} Language: New Corpus using {CNN}-based Framework", journal = j-TALLIP, volume = "24", number = "8", pages = "74:1--74:26", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3703445", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Sentiment analysis in the Malay language has traditionally focused on text-based data. Malay is the native language of Malaysia and other surrounding countries. While \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "74", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zuhra:2025:TDN, author = "Fatima Tuz Zuhra and Khalid Saleem", title = "Towards Development of New Language Resource for {Urdu}: The Large Vocabulary Word Embeddings", journal = j-TALLIP, volume = "24", number = "8", pages = "75:1--75:14", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748308", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Urdu is a resource-poor language as it lacks natural language processing (NLP) resources. NLP resources include word embeddings, treebanks, parsers, part-of-speech \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "75", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:ANI, author = "Xiangqi Wang and Yehao Kong and Luyuan Xie and Shengfang Zhai and Tairui Wang and Boyan Chen and Junkai Liang and Xin Zhang", title = "{AdvAudio}: a New Information Hiding Method via Fooling Automatic Speech Recognition Model", journal = j-TALLIP, volume = "24", number = "8", pages = "76:1--76:26", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748309", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Audio is an important medium in people's daily life, secret information can be embedded into audio for covert communication. However, traditional audio information hiding \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "76", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{B:2025:KRB, author = "Rashmi K B and H S Guruprasad and Shambhavi B R", title = "{KaEnLandetector}: Rule-Based Language Annotation and Transformer-Based Language Detection for {Kannada--English} Code-Mixed Text", journal = j-TALLIP, volume = "24", number = "8", pages = "77:1--77:19", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748310", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In multilingual societies, people tend to mix multiple languages for communication. This phenomenon is known as code-mixing or code-switching. This is visible more on social \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "77", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zha:2025:PER, author = "Yongfu Zha and Xuanxuan Che and Lina Sun and Yumin Dong", title = "Personalized Explainable Recommendations for Self-Attention Collaboration", journal = j-TALLIP, volume = "24", number = "8", pages = "78:1--78:13", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3657636", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In recommender systems, providing reasonable explanations can enhance users' comprehension of recommended results. Template-based explainable recommendation heavily \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "78", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gain:2025:IVC, author = "Baban Gain and Dibyanayan Bandyopadhyay and Samrat Mukherjee and Chandranath Adak and Asif Ekbal", title = "Impact of Visual Context on Noisy Multimodal {NMT}: an Empirical Study for {English} to {Indian} Languages", journal = j-TALLIP, volume = "24", number = "8", pages = "79:1--79:27", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748311", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Neural Machine Translation (NMT) has made remarkable progress using large-scale textual data, but the potential of incorporating multimodal inputs, especially visual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "79", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mahapatra:2025:MNB, author = "Sayan Mahapatra and Debtanu Datta and Shubham Soni and Adrijit Goswami and Saptarshi Ghosh", title = "{MILPaC}: a Novel Benchmark for Evaluating Translation of Legal Text to {Indian} Languages", journal = j-TALLIP, volume = "24", number = "8", pages = "80:1--80:30", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748313", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Most legal text in the Indian judiciary is written in complex English due to historical reasons. However, only a small fraction of the Indian population is comfortable in reading \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "80", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{FathAlalim:2025:ATB, author = "Ahmed Fat'hAlalim and Yongjian Liu and Qing Xie and Nahla Ibrahim", title = "Advancements in Transformer-Based Models for Enhanced Hate Speech Detection in {Arabic}: Addressing Dialectal Variations and Cross-Platform Challenges", journal = j-TALLIP, volume = "24", number = "8", pages = "81:1--81:29", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748492", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The rise of social media platforms has greatly amplified the spread of hate speech, which poses serious societal risks. The automated detection of hate speech on social media, especially \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "81", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lata:2025:SSM, author = "Kusum Lata and Pardeep Singh and Kamlesh Dutta", title = "{SMDDH}: Singleton Mention Detection using Deep Learning in {Hindi} Text", journal = j-TALLIP, volume = "24", number = "8", pages = "82:1--82:24", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3700821", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Mention detection is an important component of the Coreference Resolution (CR) system, where mentions such as name, nominal, and pronominals are identified. These \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "82", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Chen:2025:DSL, author = "Huiyao Chen and Xin Zhang and Jing Chen and Meishan Zhang and Min Zhang", title = "Dependency Scoring Learning and Corpus Boosting for Translation-Based Cross-Lingual Dependency Parsing", journal = j-TALLIP, volume = "24", number = "8", pages = "83:1--83:18", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748315", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Dependency parsing is a fundamental task in natural language processing that involves identifying the grammatical relationships between words in a sentence. One \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "83", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Dahou:2025:SDA, author = "Abdelghani Dahou and Abdelhalim Hafedh Dahou and Mohamed Amine Cheragui and Amin Abdedaiem and Mohammed A. A. Al-Qaness and Mohamed Abd Elaziz and Ahmed A. Ewees and Zhonglong Zheng", title = "A Survey on Dialect {Arabic} Processing and Analysis: Recent Advances and Future Trends", journal = j-TALLIP, volume = "24", number = "8", pages = "84:1--84:45", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3747290", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Advances in language models have enabled significant strides in developing language technologies tailored for analyzing and processing Dialectical Arabic (DA), \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "84", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mi:2025:LIS, author = "Chenggang Mi and Shaoliang Xie and Yu Li and Zhenghan He", title = "Loanword Identification in Social Media Texts with Extended Code-Switching Datasets", journal = j-TALLIP, volume = "24", number = "8", pages = "85:1--85:19", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748317", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "As a new data augmentation method for cross-lingual natural language processing task, loanword identification has attracted more and more attentions in recent years. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "85", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2025:HMC, author = "Santosh Kumar Mishra and Soham Chakraborty and Sriparna Saha and Pushpak Bhattacharyya", title = "{Hi-MetaCap}: Configuring Object Relational Transformer in Meta-Learning Environment for Image Captioning in {Hindi}", journal = j-TALLIP, volume = "24", number = "8", pages = "86:1--86:15", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749642", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article proposes a meta-learning-based, few-shot image captioning framework based on an ensemble of object-relational transformer models and a self-distillation strategy. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "86", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mohammadalizadeh:2025:NBP, author = "Parman Mohammadalizadeh and Leila Safari", title = "A Novel Benchmark for {Persian} Table-to-Text Generation: a New Dataset and Baseline Experiments", journal = j-TALLIP, volume = "24", number = "8", pages = "87:1--87:17", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748648", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The ability to comprehend and articulate structured data tables into natural language presents a pivotal yet challenging endeavor for automated systems. While substantial \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "87", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bibi:2025:BBU, author = "Sarfraz Bibi and Sohail Asghar and Muhammad Zubair", title = "Breaking Barriers in {URDU WSD}: The Transfer Learning Enriched {MAKS} Framework", journal = j-TALLIP, volume = "24", number = "8", pages = "88:1--88:12", month = aug, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748319", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:57 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Word Sense Disambiguation (WSD) poses a significant challenge in Natural Language Processing (NLP), particularly for languages with complex morphology and semantics like \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "88", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gang:2025:CFT, author = "Liu Gang and Yang Wenli and Wang Tongli and He Zhihao", title = "Corpus Fusion and Text Summarization Extraction for Multi-Feature Enhanced Entity Alignment", journal = j-TALLIP, volume = "24", number = "9", pages = "89:1--89:15", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744558", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Cross-lingual entity alignment endeavors to identify semantically similar entities within a knowledge graph, facilitating knowledge complementarity and enriching cross-lingual knowledge. In the context of knowledge-driven tasks such as cross-lingual \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "89", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kodali:2025:HJP, author = "Prashant Kodali and Anmol Goel and Likhith Asapu and Vamshi Krishna Bonagiri and Anirudh Govil and Monojit Choudhury and Ponnurangam Kumaraguru and Manish Shrivastava", title = "From Human Judgements to Predictive Models: Unravelling Acceptability in Code-Mixed Sentences", journal = j-TALLIP, volume = "24", number = "9", pages = "90:1--90:31", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748312", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Current computational approaches for analysing or generating code-mixed sentences do not explicitly model ``naturalness'' or ``acceptability'' of code-mixed sentences, but rely on training corpora to reflect distribution of acceptable code-mixed sentences. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "90", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mughal:2025:FRD, author = "Nimra Mughal and Ghulam Mujtaba and Muhammad Hussain Mughal and Abdul Manaf and Zainab Kamangar", title = "Fake Reviews Detection on E-Commerce {Websites} Using Novel User Behavioral Features: an Experimental Study", journal = j-TALLIP, volume = "24", number = "9", pages = "91:1--91:44", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748493", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The trend of writing fake reviews has recently increased with the rapid growth of e-commerce websites. Fake reviews are usually written to promote or demote the targeted products to affect the customer's decision and thus achieve a competitive advantage. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "91", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2025:ICI, author = "Yun Zhang and Yongguo Liu and Jiajing Zhu and Zhi Chen and Shuangqing Zhai and Xindong Wu", title = "Inner-character and Inner-word Features Based Representation Learning for {Chinese} Word Embedding", journal = j-TALLIP, volume = "24", number = "9", pages = "92:1--92:33", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748316", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Chinese word embedding is a significant task in natural language processing (NLP). Most researchers explored Chinese word embedding according to radical, component, stroke n -gram and character features. Besides these features, Chinese characters still \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "92", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Feng:2025:AHP, author = "Changkai Feng and Tong Xu and Shiwei Wu and Derong Xu and Enhong Chen", title = "Adaptive Hierarchical Prompt for Open-Vocabulary Scene Graph Generation", journal = j-TALLIP, volume = "24", number = "9", pages = "93:1--93:20", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748318", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Scene graph has long been treated as the basic tool to summarize the visual semantics from a structural perspective, which always confronts the challenge of capturing open entity and relation classes. Recently, some efforts have been made to enhance the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "93", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kumar:2025:TSC, author = "Senthil {Kumar V} and Vinoth Kumar B.", title = "Text Sentiment Classification Using Knowledge Transfer-Based Matrix Diffractive {Riemannian} Residual Triangulation Topology Aggregation Model", journal = j-TALLIP, volume = "24", number = "9", pages = "94:1--94:28", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749842", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Text sentiment classification is the process of investigating and categorizing text data based on the sentiment expressed within it, typically into groups such as positive, negative, or neutral. Existing techniques for text sentiment classification often \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "94", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mai:2025:PSB, author = "Cheng-Cheng Mai and Yu Chen and Ziyu Gong and Hanxiang Wang and Mengchuan Qiu and Chunfeng Yuan and Yihua Huang", title = "{PromptCNER}: a Segmentation-based Method for Few-shot {Chinese} {NER} with Prompt-tuning", journal = j-TALLIP, volume = "24", number = "9", pages = "95:1--95:24", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3705314", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Recognizing Chinese entities in low-resource settings is a challenging but promising task, which extracts structured pre-defined entities and corresponding types from unstructured text. Compared with the prosperous Named Entity Recognition (NER) methods \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "95", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Iqbal:2025:USP, author = "Hafiz Rizwan Iqbal and Muhammad Sharjeel and Jawad Shafi and Usama Mehmood and Agha Ali Raza", title = "{Urdu} Sentential Paraphrased Plagiarism Detection Using Large Language Models", journal = j-TALLIP, volume = "24", number = "9", pages = "96:1--96:20", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748320", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Plagiarism, the unauthorized reuse of text, fueled by the ease of access to online content, is a pressing concern for academia, publishers, and authors. Paraphrasing, a common tactic in textual plagiarism, compounds the problem further. The automatic \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "96", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:UPB, author = "Yu Wang and Jinmao Xu and Miao Wang and Tianrui Li and Xuanren Qu", title = "A Unified Prompt-based Framework with Label Semantic Expansion for Joint Multi-intent Detection and Slot Filling.", journal = j-TALLIP, volume = "24", number = "9", pages = "97:1--97:14", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749372", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Natural Language Understanding (NLU), which includes both intent detection (ID) and slot filling (SF), is essential for extracting crucial textual information, significantly influencing subsequent tasks and applications. Traditionally, ID and SF were \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "97", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Checker:2025:HHE, author = "Saksham Checker and Madhuri Yadav and Rahul Katarya", title = "{HiEnWrite}: a {Hindi--English} Bilingual Dataset for {Big Five} Personality Detection", journal = j-TALLIP, volume = "24", number = "9", pages = "98:1--98:19", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3756010", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Detecting human personality traits is a critical task across various domains, including healthcare, education, and psychology. Recent advancements in artificial intelligence have greatly enhanced the automatic detection of personality traits using writing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "98", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Qingdaoerji:2025:MES, author = "Ren Qingdaoerji and Yang Yang and Wang Lele", title = "{Mongolian} Emotional Speech Synthesis Based on {CGAN} and Improved {FastSpeech2}", journal = j-TALLIP, volume = "24", number = "9", pages = "99:1--99:16", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749102", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Mongolian speech synthesis is a technology that converts Mongolian text into Mongolian speech. In order to improve the emotional expressiveness of synthesized speech, this article first proposed a lightweight Mongolian phoneme pre-training model WFST-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "99", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Singh:2025:HCA, author = "Sanskar Singh and Shivam Kushwaha and Avantika Singh and Shaifu Gupta", title = "{Hinglish} Cross-Accent Model Agnostic Meta-Learning Automatic Speech Recognition", journal = j-TALLIP, volume = "24", number = "9", pages = "100:1--100:18", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748322", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Mother tongues and regional dialects have a substantial impact on pronunciation, leading to a range of complex and unique accents. This complexity increases in a diverse country such as India, which has code-mixed languages, which necessitates the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "100", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Al-Thubaity:2025:NDA, author = "Abdulmohsen Al-Thubaity", title = "A Novel Dataset for {Arabic} Domain Specific Term Extraction and Comparative Evaluation of {BERT}-Based Models for {Arabic} Term Extraction", journal = j-TALLIP, volume = "24", number = "9", pages = "101:1--101:12", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748323", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Automatic term extraction from domain-specific corpora is a well-known challenge in natural language processing, with applications in machine translation, information retrieval, text classification, ontology building, and thesaurus construction. Unlike \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "101", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Fashwan:2025:CLA, author = "Amany Fashwan and Sameh Alansary", title = "Computational Linguistic Approach to Orthographic Representation of {Egyptian} {Arabic}: Challenges and Implications", journal = j-TALLIP, volume = "24", number = "9", pages = "102:1--102:14", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748324", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the past, Arabic Dialects (AD) have been poorly documented linguistically due to the lack of written forms and orthographies. However, in recent years, AD have become more widely used as a means of communication since social media and the everywhere \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "102", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tulajiang:2025:BLN, author = "Paerhati Tulajiang and Yuanyuan Sun and Yuanyu Zhang and Yingying Le and Kelaiti Xiao and Hongfei Lin", title = "A Bilingual Legal {NER} Dataset and Semantics-Aware Cross-Lingual Label Transfer Method for Low-Resource Languages", journal = j-TALLIP, volume = "24", number = "9", pages = "103:1--103:21", month = sep, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748325", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Tue Sep 23 08:50:55 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Named Entity Recognition (NER) in specialized domains for low-resource languages remains a significant challenge due to data scarcity and the complexity of domain-specific terminology. Existing cross-lingual approaches-spanning model-transfer and data-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "103", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bouchal:2025:TAR, author = "Hakim Bouchal and Ahror Belaid and Farid Meziane", title = "Towards Accurate Recognition of Historical {Arabic} Manuscripts: a Novel Dataset and a Generalizable Pipeline", journal = j-TALLIP, volume = "24", number = "10", pages = "104:1--104:30", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3744243", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In today's digital world, we are committed to digitizing thousands of handwritten transcriptions to preserve their content. Historical Arabic Handwritten Text Recognition (HAHTR) remains a challenge for computer vision systems, due to the many \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "104", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Babayigit:2025:BBM, author = "Bilal Babayigit and Hamza Sattuf and Mohammed Abubaker", title = "{BERT}-based Models for Keyword Extraction from {Arabic} Scientific Articles", journal = j-TALLIP, volume = "24", number = "10", pages = "105:1--105:25", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3761805", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Keywords at the beginning of research articles are crucial for conveying the content and main ideas of academic works. They serve as essential tools for researchers to efficiently search for relevant topics. The integration of traditional natural language \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "105", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Shukla:2025:CMR, author = "Shubham Shukla and Sushama Nagpal and Sangeeta Sabharwal", title = "Code-Mixed Romanized {Hindi} Hate Speech Identification: Leveraging {BERT} Embeddings and Particle Swarm Optimization", journal = j-TALLIP, volume = "24", number = "10", pages = "106:1--106:24", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748326", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The volume of hate speeches and the number of user-generated materials are steadily rising, notably on social media networks. This trend can be seen across the internet. Therefore, it is necessary to recognize this kind of offensive content and remove it \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "106", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Jose:2025:OMH, author = "Bineesh Jose and K. P. Pushpalatha", title = "Optimized {Malayalam} Handwritten Character Recognition Model Using a Novel {DSC} and Stacked Bi-{LSTM} with Data Augmentation", journal = j-TALLIP, volume = "24", number = "10", pages = "107:1--107:30", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3756011", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Handwritten character recognition (HCR) is a field of computer science and artificial intelligence that involves the recognition of handwritten characters and symbols, typically from digital images or scans. Malayalam has many unique letters with complex \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "107", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhou:2025:MHT, author = "Yifei Zhou and Yves Lepage", title = "Mixup Helps Translation, But Do the Coefficients and the Selection Strategy Influence Translation Quality?", journal = j-TALLIP, volume = "24", number = "10", pages = "108:1--108:20", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3750043", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Mixup, an interpolation-based method that implicitly generates synthetic examples for training, has shown effectiveness in tasks such as image and text classification. Standard mixup randomly interpolates two samples of images and their labels. In this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "108", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2025:EPM, author = "Ning-Qian Wu and Zhen-Hua Ling", title = "Enhanced Prosody Modeling and Character Voice Controlling for Audiobook Speech Synthesis", journal = j-TALLIP, volume = "24", number = "10", pages = "109:1--109:24", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749644", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Conventional speech synthesis techniques have made significant strides towards achieving human-like performance. However, the domain of audiobook speech synthesis still presents notable challenges. On one hand, the speech in audiobooks exhibits rich \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "109", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:DDM, author = "Cencen Liu and Wen Yin and Yi Xu and Qiugang Zhan and Dongyang Zhang and Raza Ahmad", title = "{DDM4TST}: Diffusion Model for Fine-grained Text Style Transfer by Disentangled Representation", journal = j-TALLIP, volume = "24", number = "10", pages = "110:1--110:17", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749195", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Fine-grained Text Style Transfer (FTST) aims to make targeted and precise modifications to specific stylistic components of a sentence. Existing methods typically attempt to disentangle style and content representations for FTST. However, style and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "110", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Faheem:2025:ASU, author = "Ali Faheem and Faizad Ullah and Muhammad Sohaib Ayub and Asim Karim", title = "Abstractive Summarization for {Urdu} Video Description Generation", journal = j-TALLIP, volume = "24", number = "10", pages = "111:1--111:21", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3762992", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Automatic summarization condenses content while retaining key ideas and details. Urdu, with over 230 million speakers globally, is one of the most widely spoken languages. The rise of Urdu content on social media platforms has driven the need for tools \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "111", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kboubi:2025:FGA, author = "Ferihane Kboubi and Anja Habacha Chaibi", title = "Fine-Grained {Arabic} Dialect Identification: Investigating Various Approaches Across Multiple Datasets", journal = j-TALLIP, volume = "24", number = "10", pages = "112:1--112:33", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3758093", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Arabic, as a language, encompasses numerous dialectical variations. Despite sharing a common vocabulary, these variants exhibit significant differences across territories, even within the same country. The Arabic dialects identification (ADI) is important \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "112", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Aloraini:2025:SCZ, author = "Abdulrahman Aloraini and Juntao Yu and Wateen Aliady and Massimo Poesio", title = "A Survey of Coreference and Zeros Resolution for {Arabic}", journal = j-TALLIP, volume = "24", number = "10", pages = "113:1--113:24", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3702323", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Coreference resolution is the task of resolving mentions that refer to the same entity into clusters. The area and its tasks are crucial in natural language processing applications. Extensive surveys of this task have been conducted for English and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "113", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhao:2025:CAK, author = "Shuo Zhao and Jianyong Duan and Li He and Hao Wang and Qing Zhang and Jie Liu", title = "Continuous Adaptive Knowledge Distillation for Few-Shot Relation Extraction", journal = j-TALLIP, volume = "24", number = "10", pages = "114:1--114:13", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765749", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The goal of continuous few-shot relation extraction is to enable the model to continuously learn new relation types under conditions with limited labeled training data while avoiding the forgetting of previously learned relations. The primary challenges \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "114", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Hashmi:2025:RLA, author = "Ehtesham Hashmi and Hasnain Ahmad and Muhammad Tayyab Mazhar and Sule Yildirim Yayilgan and Mehtab Afzal and Sarang Shaikh", title = "A Robust and Linguistically-Aware Hate Speech Detection System for {Roman Urdu}", journal = j-TALLIP, volume = "24", number = "10", pages = "115:1--115:22", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3768571", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Social media sites have developed into a common space for individuals to share their concerns and opinions. There is a chance for individuals and organizations to participate in online behavior that breaches accepted social norms because of the \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "115", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Lin:2025:DAR, author = "Luyang Lin and Jing Li and Kam-Fai Wong", title = "Data-Augmented and Retrieval-Augmented Context Enrichment in {Chinese} Media Bias Detection", journal = j-TALLIP, volume = "24", number = "10", pages = "116:1--116:24", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765898", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Warning: This article contains content that may be offensive or controversial. With the increasing pursuit of objective reports, automatically understanding media bias has drawn more attention in recent research. However, most of previous work examines \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "116", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Guo:2025:MAG, author = "Hai Guo and Dawei Zhu and Jingying Zhao and Lingling Tong", title = "Multi-attention Ghost Pyramid Fusion Network for Script Identification of {Chinese} Ancient Document Images", journal = j-TALLIP, volume = "24", number = "10", pages = "117:1--117:16", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748314", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Script identification is a key step in document analysis and recognition in multilingual environments. This study proposed a new dataset for script identification algorithms, containing images of ancient documents in 12 different ethnic scripts, including \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "117", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mishra:2025:CLI, author = "Santosh Kumar Mishra and Sriparna Saha and Pushpak Bhattacharyya", title = "Continual Learning for Image Captioning in {Hindi}", journal = j-TALLIP, volume = "24", number = "10", pages = "118:1--118:19", month = oct, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749641", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Sat Oct 11 07:06:48 MDT 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Continual learning, alternatively referred to as incremental learning or lifelong learning, represents a learning paradigm enabling an agent to acquire new information without compromising its retention of previously learned knowledge. Continual learning \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "118", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ghosal:2025:PBD, author = "Sayani Ghosal and Aditya Bachhawat and Amita Jain and Devendra Tayal", title = "Political Bias Detection from {Hindi} News Using Neutrosophic Sets, {MuRil} and Extended {Hindi} {SentiWordNet}", journal = j-TALLIP, volume = "24", number = "11", pages = "119:1--119:26", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767316", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The Media is stated as the ``fourth pillar of democracy'' that influences day to day life of each individual. The falling rank of India in the (WPFI) portrays biasness in news \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "119", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Yagnik:2025:MOB, author = "Mansi Yagnik and Mehreen Hashmi and Deepika Kumar and Khushi Jain and Ekagrah Grover and Jude D Hemanth", title = "A Multi-Output {BERT} Framework for Abusive Comment Detection and Sentiment Analysis on Low-Resource Language", journal = j-TALLIP, volume = "24", number = "11", pages = "120:1--120:25", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3766889", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the modern digital world, social media has become essential for interpersonal interaction by promoting the interchange of ideas and points of view. But there are difficulties in this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "120", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cai:2025:CSC, author = "Ting Cai and Yu Xiong and Xinming Qin and Yu Yao", title = "Considering Student's Cognitive Abilities-Entity Level and Entirety Level Sentiment Analysis of Teaching Evaluation", journal = j-TALLIP, volume = "24", number = "11", pages = "121:1--121:19", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3766517", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Student Teaching Feedback Sentiment Analysis (STFSA) plays a crucial role in evaluating teaching effectiveness, and its analysis results are influenced by both teaching \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "121", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wu:2025:AAD, author = "Jiayang Wu and Wensheng Gan and Jiahao Zhang and Philip Yu", title = "{ADKGD}: Anomaly Detection in Knowledge Graphs with Dual-Channel Training", journal = j-TALLIP, volume = "24", number = "11", pages = "122:1--122:29", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3748321", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the current development of large language models (LLMs), it is important to ensure the accuracy and reliability of the underlying data sources. LLMs are critical for various \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "122", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Inan:2025:CRM, author = "Emrah Inan", title = "Contrastive Retrieval Methodology for {Turkish} Metaphor Detection and Identification", journal = j-TALLIP, volume = "24", number = "11", pages = "123:1--123:21", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3770072", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Metaphorical expressions, as a form of figurative language, are individually limited in their use. However, when both literal and non-literal meanings are considered, they are \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "123", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sung:2025:TRM, author = "Hakyung Sung and Gyu-Ho Shin", title = "Towards Robust Morphosyntactic Analysis of {L2} {Korean}: Evaluating and Fine-Tuning a {Korean} Language Model", journal = j-TALLIP, volume = "24", number = "11", pages = "124:1--124:21", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767330", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Despite the growing use of NLP in second language (L2) research, model accuracy in L2 settings remains underexplored. This study addresses this gap by evaluating and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "124", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:EFC, author = "Xiao Liu and Shichang Zhu and Ying Li and Xin Chen and Zhengtao Yu", title = "Entity-focused {Chinese} Spelling Correction: Dataset and Approach", journal = j-TALLIP, volume = "24", number = "11", pages = "125:1--125:17", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765761", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/spell.bib; https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Due to the strong representation capability of pre-trained language models, Chinese spelling correction models have significantly improved. However, pre-trained language models \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "125", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sriwirote:2025:PEP, author = "Panyut Sriwirote and Attapol T. Rutherford and Jalinee Thapiang and Vasan Timtong", title = "{PhayaThaiBERT}: Enhancing a Pretrained {Thai} Language Model with Unassimilated Loanwords", journal = j-TALLIP, volume = "24", number = "11", pages = "126:1--126:17", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3765962", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Although WangchanBERTa has become the de facto standard in transformer-based Thai language modeling, it still has shortcomings in regard to the understanding of foreign \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "126", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tahir:2025:HHD, author = "Sidra Tahir and Asif Nawaz", title = "{HuEID}: Hybrid Deep Learning for Cyberbullying Detection using Multi-Modal {Urdu} Text and Emojis", journal = j-TALLIP, volume = "24", number = "11", pages = "127:1--127:19", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3769294", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "In the age of digital communication, social media platforms have become essential to our daily lives, providing unprecedented opportunities for interaction and information sharing. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "127", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Almujaiwel:2025:CCB, author = "Sultan Almujaiwel and Damith Premasiri and Tharindu Ranasinghe and Mo El-Haj and Ruslan Mitkov", title = "Complex Concept-Based Readability Estimation from {Arabic} Curriculum", journal = j-TALLIP, volume = "24", number = "11", pages = "128:1--128:21", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3770070", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "This article presents an approach to readability estimation that focuses on conceptual rather than linguistic complexity, using the extensive SaudiTextBooks textbooks. We \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "128", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Bayram:2025:HFT, author = "M. Ali Bayram and Banu Diri and Savas Yildirim", title = "Healthcare-Focused {Turkish} Medical {LLM}: Training on Real Patient--Doctor Question--Answer Data for Enhanced Medical Insight", journal = j-TALLIP, volume = "24", number = "11", pages = "129:1--129:13", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3772000", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The development of a Turkish-specific Large Language Model (LLM) for healthcare presents a unique opportunity to enhance AI's accessibility and relevance for Turkish-speaking \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "129", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Selamet:2025:GCS, author = "Dilara Toruno{\u{g}}lu Selamet and Ali {\c{S}}enta{\c{s}} and G{\"u}l{\c{s}}en Eryi{\u{g}}it", title = "Gamified Crowd-sourcing for Word Sense Disambiguation of {Turkish}", journal = j-TALLIP, volume = "24", number = "11", pages = "130:1--130:23", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3767160", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Word sense disambiguation (WSD) is the process of determining the correct meaning of a word based on its context in a sentence, a task that remains one of the core challenges in \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "130", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gormez:2025:DDL, author = "Yasin G{\"o}rmez and Halil Arslan and Mustafa Elyakan", title = "Developing Deep Learning Models for {Turkish} Automatic Punctuation Restoration Using a Novel Dataset", journal = j-TALLIP, volume = "24", number = "11", pages = "131:1--131:15", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3772087", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Today, automatic speech recognition systems are widely used by individuals, institutions, and organizations. However, the lack of punctuation marks in the texts produced by these \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "131", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Rai:2025:MND, author = "Pooja Rai and Ayan Das and Sanjay Chatterji", title = "Mapping of the {Nepali} Dependency Treebank to Universal Dependencies", journal = j-TALLIP, volume = "24", number = "11", pages = "132:1--132:22", month = nov, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3749643", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Nov 28 13:36:38 MST 2025", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Universal Dependencies (UD) have garnered notable focus for the systematic assessment of cross-lingual methods in the task of dependency parsing. In this article, we present our \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "132", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Wang:2025:MIB, author = "Hao Wang and Tang Li and Siyuan Du and Xiao Wei", title = "Mixed Information Bottleneck for Location Metonymy Resolution Using Pre-trained Language Models", journal = j-TALLIP, volume = "24", number = "12", pages = "133:1--133:24", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3774933", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Metonymy resolution (MR) is a crucial challenge in natural language understanding and information retrieval. Recent large-scale pre-trained language models have shown promising results in various natural language processing (NLP) tasks, including MR. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "133", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Haouhat:2025:AAM, author = "Abdelhamid Haouhat and Slimane Bellaouar and Attia Nehar and Hadda Cherroun", title = "{Amd'SaEr}: {Arabic} Multimodal Dataset for Sentiment Analysis and Emotion Recognition", journal = j-TALLIP, volume = "24", number = "12", pages = "134:1--134:28", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3774880", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Multimodal sentiment analysis and emotion recognition have attracted significant interest in multimodal learning. Naturally, humans express their feelings and emotions through nuanced expressions across various verbal and non-verbal modalities. Despite \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "134", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Tahery:2025:IEM, author = "Saedeh Tahery and Saeed Farzi", title = "An Invasive Embedding Model in Favor of Low-Resource Languages Understanding", journal = j-TALLIP, volume = "24", number = "12", pages = "135:1--135:24", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3771926", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "The contextual representations generated by multilingual pre-trained language models involve semantic content that conveys the meaning of sentences, as well as language-dependent nuances that indicate language-specific information. However, for cross-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "135", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Kulkarni:2025:ECH, author = "Dhanashree Kulkarni and Anand Deshpande and Vania Estrela", title = "Emotion Classification for {Hindi} Text: a Hybrid Approach", journal = j-TALLIP, volume = "24", number = "12", pages = "136:1--136:13", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3777372", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Emotion classification is a critical component in understanding human expression and interaction in textual data. However, achieving high accuracy in emotion classification, particularly in complex languages like Hindi, presents several challenges. This \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "136", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Gupta:2025:AHT, author = "Saumay Gupta and Sukomal Pal", title = "Advancing {Hindi} Text Summarization: Named Entity Recognition and Content Augmentation Strategies", journal = j-TALLIP, volume = "24", number = "12", pages = "137:1--137:32", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3770073", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "We explore advancements in Hindi text summarization, a critical area in natural language processing that aids in managing information overload. Despite a growing corpus of Hindi data, there's a significant gap in practical summarization tools due to \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "137", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Zhang:2025:TMM, author = "Dongyu Zhang and Shengcheng Yin and Jingwei Yu and Zhiyao Wu and Zhen Li and Chengpei Xu and Xiaoxia Wang and Feng Xia", title = "Towards Multimodal Metaphor Understanding: a {Chinese} Dataset and Model for Metaphor Mapping Identification", journal = j-TALLIP, volume = "24", number = "12", pages = "138:1--138:25", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3773989", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Metaphors play a crucial role in human communication, yet their comprehension remains a significant challenge for natural language processing (NLP) due to the cognitive complexity involved. According to Conceptual Metaphor Theory (CMT), metaphors map a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "138", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Liu:2025:FFG, author = "Yan Liu and Renren Jin and Ling Shi and Zheng Yao and Deyi Xiong", title = "{FineMath}: a Fine-Grained Mathematical Evaluation Benchmark for {Chinese} Large Language Models", journal = j-TALLIP, volume = "24", number = "12", pages = "139:1--139:15", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3773083", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "To thoroughly assess the mathematical reasoning abilities of Large Language Models (LLMs), we need to carefully curate evaluation datasets covering diverse mathematical concepts and mathematical problems at different difficulty levels. In pursuit of this \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "139", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Baranwal:2025:EDC, author = "Pushkar Baranwal and Amit Pundir and Sanjeev Singh and Geetika Saxena", title = "Embedding-Driven Clustering for Unerring Content Categorization in Low-Resource {Hindi} Language", journal = j-TALLIP, volume = "24", number = "12", pages = "140:1--140:33", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3776554", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Content generation has been happening at a very high rate in many languages including the low resource local languages. It is crucial that this content is classified accurately to reach the right target audience within the shortest time possible. ML and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "140", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sha:2025:TLL, author = "Jiu Sha and Mengxiao Zhu and Chong Feng and Jizhuoma Ci", title = "{Tibetan-LLaMA 2}: Large Language Model for {Tibetan}", journal = j-TALLIP, volume = "24", number = "12", pages = "141:1--141:19", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3776748", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Large language models (LLMs), such as ChatGPT and LLama, have shown remarkable capability in a wide range of natural language tasks. However, the current LLMs are mainly concentrated in resource-rich languages, such as English and Chinese. For low-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "141", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Ren:2025:OOC, author = "Bo Ren and Haolan Wang and Linqin Cai and Zhiwei Cai", title = "{OBE} Oriented Constructing Method of Educational Knowledge Graph for Automatic Control Discipline", journal = j-TALLIP, volume = "24", number = "12", pages = "142:1--142:23", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3772056", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "With the rapid development of education informatization and the exponential growth of education big data, Educational Knowledge Graph (EKG) has received significant attention in the field of smart education. However, current research of constructing \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "142", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Mohammadi:2025:MIC, author = "Hassan Haji Mohammadi and Alireza Talebpour and Ahmad Mahmoudi-Aznaveh and Samaneh Yazdani", title = "Morphosyntactically-Informed Coreference Resolution for {Persian} with Adaptive Pruning and Global Context Aggregation: Integrating Adaptive Pruning and Global Context for Improved Reference Linking in {Persian}", journal = j-TALLIP, volume = "24", number = "12", pages = "143:1--143:33", month = dec, year = "2025", CODEN = "????", DOI = "https://doi.org/10.1145/3772089", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:36:13 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Coreference resolution in Persian, a task critical to natural language understanding, presents unique challenges due to the language's pro-drop tendencies, flexible word order, and rich morphosyntactic agreement system. This study introduces the first end-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "143", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Nelatoori:2026:DHE, author = "Kiran Babu Nelatoori and Ashish Kumar Sahagal and Hima Bindu Kommanti", title = "Deromanization of {Hindi--English} Code-Mixed Text and its Influence on Toxic Comment Classification and Sentiment Analysis", journal = j-TALLIP, volume = "25", number = "1", pages = "1:1--1:38", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3777546", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Code-mixing has become increasingly prevalent in multilingual societies, primarily due to the widespread use of social media platforms. However, processing code-mixed text presents significant challenges, mainly stemming from non-standardized spellings \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "1", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sharma:2026:CRT, author = "Himanshu Sharma and Devanand Padha and Yashwant Singh", title = "{ChitraVivran}: Real-Time Attention-Based {Hindi} Image Captioning with Boosted Contextual Descriptors", journal = j-TALLIP, volume = "25", number = "1", pages = "2:1--2:32", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3779214", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Image captioning is the ability to generate concise natural language descriptions of given images. It integrates computer vision and natural language processing, two cutting-edge artificial intelligence disciplines. Image captioning is nowadays widely \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "2", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Raj:2026:HMD, author = "Mohit Raj and Ritesh Kumar", title = "How Much Data in Low-resource {Indian} Languages is {``Sufficient''} for Transfer Learning: a Comparative Study for {POS} Annotation", journal = j-TALLIP, volume = "25", number = "1", pages = "3:1--3:26", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3783981", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Recent advances in machine learning and deep learning have demonstrated the applicability and utility of cross-lingual, transfer learning methods in low and zero-resource scenarios. We explore the applicability of transfer learning methods from pre-. \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "3", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Sarihan:2026:CPF, author = "Tansel Sarihan and Cem Erg{\"U}n", title = "{CrammedBERTurk}: Pretraining\slash Finetuning a New Language Model for {Turkish} Question Answering on Limited Budget: Pretraining\slash Finetuning a New Language Model for {Turkish} Question Answering on Limited Budget", journal = j-TALLIP, volume = "25", number = "1", pages = "4:1--4:23", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3780096", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "A comprehensive evaluation of transformer-based models for Turkish Question Answering (QA) is conducted, introducing the novel pretraining and fine-tuning of CrammedBERTurk for the first time in this domain. The CrammedBERTurk model was pretrained on a \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "4", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Naaz:2026:CDA, author = "Komal Naaz and Niraj Kumar Singh", title = "Commonality and Diversity Analysis for Style Determination: Resource Creation and System Development", journal = j-TALLIP, volume = "25", number = "1", pages = "5:1--5:28", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3786585", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Stylometry has gained momentum within digital humanities, yet studies focusing on Hindi poetry remain scarce, especially in metrical compositions like doha. This article presents a computational and statistical approach to analyzing stylistic commonality \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "5", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Cheng:2026:MNM, author = "Cheng Cheng and Xingxing Cai and Hengrui Qi and Wenyun Chen and Yong Zhang", title = "{MSDA-Net}: Multi-source Domain Adaptive Network for Multi-modal Emotion Recognition", journal = j-TALLIP, volume = "25", number = "1", pages = "6:1--6:22", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3786588", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Electroencephalogram (EEG) has shown great potential in multi-modal emotion recognition (MER) due to its ability to directly capture emotional states. However, the nonstationarity of EEG signals leads to significant variations across subjects and \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "6", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", } @Article{Priya:2026:IOH, author = "Kumari Priya and Shivam Anand and Chandranath Adak", title = "Inspecting Offline Handwritten Signature Intra-Variation Over Time: an Empirical Study", journal = j-TALLIP, volume = "25", number = "1", pages = "7:1--7:15", month = jan, year = "2026", CODEN = "????", DOI = "https://doi.org/10.1145/3786589", ISSN = "2375-4699 (print), 2375-4702 (electronic)", ISSN-L = "2375-4699", bibdate = "Fri Jan 30 09:41:01 MST 2026", bibsource = "https://www.math.utah.edu/pub/tex/bib/tallip.bib", abstract = "Handwritten signatures serve as crucial personal identifiers and have been extensively used for authentication purposes for a long time in the human race. Signatures exhibit substantial variations influenced by factors such as mood, time, writing speed, \ldots{}", acknowledgement = ack-nhfb, ajournal = "ACM Trans. Asian Low-Resour. Lang. Inf. Process.", articleno = "7", fjournal = "ACM Transactions on Asian and Low-Resource Language Information Processing (TALLIP)", journal-URL = "https://dl.acm.org/loi/tallip", }