@inproceedings{7134ba11eace4708b1d005feb78d1846,
title = "A domain-independent text segmentation method for educational course content",
abstract = "In this study, we have proposed a domain-independent text segmentation algorithm which is particularly useful in online educational courses. Text segmentation is proven to be helpful in improving the readability of large corpora of documents, which is essential in education scenarios. While existing domain-dependent text segmentation methods have much better performance than domain-independent methods in most cases, only domain-independent methods are applicable to sparse training content in education scenarios. Our method, unlike other domain-dependent text segmentation methods, doesn't require heavy training on prior documents, but only need to train on the current corpus of documents with topic distributions and word vector representations. Our proposed method develops text boundaries between small text units in three steps. We first calculate input text features via topical distributions (latent Dirichlet allocation) and word embeddings (GloVe). We then calculate similarity values between such textual features and detect distribution changes between the similarities. We finally perform clustering on the similarities and detect sub-topic boundaries via cluster differences. We test our method on two datasets, one from an online education course and one from a popular public dataset-Choi Dataset. The results demonstrate that our method outperforms other state-of-the-art domain-independent text segmentation approaches while achieving performance comparable to a few domain-dependent algorithms.",
keywords = "Latent Dirichlet Allocation, Semantic Information, Text Segmentation, Topic Modeling, Word Embedding",
author = "Yuwei Tu and Ying Xiong and Weiyu Chen and Christopher Brinton",
year = "2019",
month = feb,
day = "7",
doi = "10.1109/ICDMW.2018.00053",
language = "English (US)",
series = "IEEE International Conference on Data Mining Workshops, ICDMW",
publisher = "IEEE Computer Society",
pages = "320--327",
editor = "Hanghang Tong and Feida Zhu and Jeffrey Yu and Zhenhui Li",
booktitle = "Proceedings - 18th IEEE International Conference on Data Mining Workshops, ICDMW 2018",
address = "United States",
note = "18th IEEE International Conference on Data Mining Workshops, ICDMW 2018 ; Conference date: 17-11-2018 Through 20-11-2018",
}