LDA 2018

1. Improvement Comments

In [259]:
# Build model
mod18, cp18, id2word18, data_lemm18 = get_topics(df18_nw)

# Compute Coherence Score
coherence_model_lda18 = CoherenceModel(model=mod18, texts=data_lemm18, dictionary=id2word18, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda18.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod18.log_perplexity(cp18))  # a measure of how good the model is. lower the better.
Coherence Score:  0.4244802583339723

Perplexity:  -9.116064523315547
In [249]:
# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod18, cp18, id2word18, mds='mmds')
Out[249]:

2. All Text Responses

In [ ]:
mod18all, cp18all, id2word18all, data_lemm18all = get_topics(df18_nw, textcol='all_text')
In [276]:
# Compute Coherence Score
coherence_model_lda18all = CoherenceModel(model=mod18all, texts=data_lemm18all, dictionary=id2word18all, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda18all.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod18all.log_perplexity(cp18all))  # a measure of how good the model is. lower the better.
Coherence Score:  0.3922923992327375

Perplexity:  -11.749656692843635
In [277]:
# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod18all, cp18all, id2word18all, mds='mmds')
Out[277]:

LDA 2019

1. Improvement Comments

In [261]:
mod19, cp19, id2word19, data_lemm19 = get_topics(df19_nw)

# Compute Coherence Score
coherence_model_lda19 = CoherenceModel(model=mod19, texts=data_lemm19, dictionary=id2word19, coherence='c_v')
print('\nCoherence Score: ', coherence_model_lda19.get_coherence())

# Compute Perplexity
print('\nPerplexity: ', mod19.log_perplexity(cp19))  # a measure of how good the model is. lower the better.
Coherence Score:  0.4334418066202172

Perplexity:  -9.656356397428038
In [262]:
# Visualize the topics
pyLDAvis.enable_notebook()
pyLDAvis.gensim.prepare(mod19, cp19, id2word19, mds='mmds')
Out[262]: