LDA 2018¶

1. Improvement Comments¶

# Build model
mod18, cp18, id2word18, data_lemm18 = get_topics(df18_nw)

# Compute Coherence Score
coherence_model_lda18 = CoherenceModel(model=mod18, texts=data_lemm18, dictionary=id2word18, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda18.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod18.log_perplexity(cp18))  # a measure of how good the model is. lower the better.

Coherence Score:  0.4244802583339723

Perplexity:  -9.116064523315547

# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod18, cp18, id2word18, mds='mmds')

2. All Text Responses¶

mod18all, cp18all, id2word18all, data_lemm18all = get_topics(df18_nw, textcol='all_text')

# Compute Coherence Score
coherence_model_lda18all = CoherenceModel(model=mod18all, texts=data_lemm18all, dictionary=id2word18all, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda18all.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod18all.log_perplexity(cp18all))  # a measure of how good the model is. lower the better.

Coherence Score:  0.3922923992327375

Perplexity:  -11.749656692843635

# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod18all, cp18all, id2word18all, mds='mmds')

LDA 2019¶

1. Improvement Comments¶

mod19, cp19, id2word19, data_lemm19 = get_topics(df19_nw)

# Compute Coherence Score
coherence_model_lda19 = CoherenceModel(model=mod19, texts=data_lemm19, dictionary=id2word19, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda19.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod19.log_perplexity(cp19))  # a measure of how good the model is. lower the better.

Coherence Score:  0.4334418066202172

Perplexity:  -9.656356397428038

# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod19, cp19, id2word19, mds='mmds')