In [1]:
# 必要なライブラリをインポート
import pandas as pd
import re
from itertools import combinations
from collections import Counter
from pyvis.network import Network
import spacy
# spaCyモデルのロード
nlp = spacy.load("en_core_web_sm")
# 動詞だけを抽出する関数
def extract_verbs(text):
doc = nlp(text)
return [token.text for token in doc if token.pos_ == "VERB"]
# 形容詞だけを抽出する関数
def extract_adjectives(text):
doc = nlp(text)
return [token.text for token in doc if token.pos_ == "ADJ"]
# IMDbデータセットを読み込む
data = pd.read_csv("IMDB Dataset.csv")
data.head()
Out[1]:
| review | sentiment | |
|---|---|---|
| 0 | One of the other reviewers has mentioned that ... | positive |
| 1 | A wonderful little production. <br /><br />The... | positive |
| 2 | I thought this was a wonderful way to spend ti... | positive |
| 3 | Basically there's a family where a little boy ... | negative |
| 4 | Petter Mattei's "Love in the Time of Money" is... | positive |
In [2]:
# ポジティブレビューとネガティブレビューのサンプリング
review_positive = data[data['sentiment'] == 'positive'].sample(n=100, random_state=50)
review_negative = data[data['sentiment'] == 'negative'].sample(n=100, random_state=50)
In [3]:
# ポジティブレビューとネガティブレビューの動詞抽出
#review_positive['tokens'] = review_positive['review'].apply(extract_verbs)
#review_negative['tokens'] = review_negative['review'].apply(extract_verbs)
# ポジティブレビューとネガティブレビューの形容詞抽出
review_positive['tokens'] = review_positive['review'].apply(extract_adjectives)
review_negative['tokens'] = review_negative['review'].apply(extract_adjectives)
In [4]:
# 共起ペア生成関数(順序を無視してカウント)
def generate_cooccurrence(tokens_list):
"""
トークンリストから順序を無視した共起ペアを生成し、頻度をカウント
"""
pairs = []
for tokens in tokens_list:
# 自己ペアを排除し、ソートしたペアを生成
pairs.extend(tuple(sorted((a, b))) for a, b in combinations(tokens, 2) if a != b)
return Counter(pairs)
In [5]:
# ポジティブレビューの共起ペア生成
cooccurrence_positive = generate_cooccurrence(review_positive['tokens'])
cooccurrence_positive
Out[5]:
Counter({('good', 'great'): 38,
('good', 'little'): 30,
('good', 'other'): 29,
('little', 'other'): 23,
('great', 'other'): 23,
('best', 'young'): 23,
('good', 'many'): 22,
('best', 'great'): 22,
('best', 'old'): 22,
('little', 'more'): 20,
('best', 'same'): 20,
('old', 'young'): 20,
('best', 'many'): 19,
('other', 'same'): 19,
('good', 'own'): 18,
('first', 'other'): 18,
('many', 'other'): 17,
('many', 'most'): 16,
('few', 'other'): 16,
('many', 'young'): 16,
('big', 'silent'): 16,
('better', 'good'): 15,
('little', 'own'): 15,
('many', 'same'): 15,
('best', 'new'): 15,
('best', 'other'): 15,
('good', 'nice'): 15,
('good', 'more'): 15,
('good', 'low'): 15,
('funny', 'good'): 15,
('many', 'southern'): 15,
('most', 'southern'): 15,
('real', 'silent'): 15,
('short', 'silent'): 15,
('little', 'many'): 14,
('little', 'most'): 14,
('best', 'little'): 14,
('great', 'new'): 14,
('few', 'good'): 14,
('first', 'good'): 14,
('little', 'such'): 14,
('big', 'great'): 14,
('most', 'other'): 14,
('good', 'such'): 14,
('best', 'good'): 14,
('first', 'little'): 14,
('much', 'other'): 13,
('little', 'same'): 13,
('new', 'same'): 13,
('few', 'great'): 13,
('brilliant', 'other'): 13,
('great', 'many'): 13,
('more', 'other'): 13,
('beautiful', 'great'): 13,
('little', 'low'): 13,
('older', 'younger'): 12,
('great', 'old'): 12,
('new', 'other'): 12,
('good', 'only'): 12,
('great', 'little'): 12,
('good', 'most'): 12,
('full', 'other'): 12,
('other', 'strong'): 12,
('great', 'same'): 12,
('great', 'real'): 12,
('good', 'much'): 11,
('little', 'much'): 11,
('last', 'other'): 11,
('many', 'more'): 11,
('-', 'many'): 11,
('fantastic', 'same'): 11,
('many', 'own'): 11,
('own', 'same'): 11,
('great', 'silent'): 11,
('most', 'young'): 11,
('big', 'little'): 11,
('good', 'real'): 11,
('funny', 'own'): 11,
('great', 'perfect'): 11,
('good', 'same'): 11,
('many', 'old'): 11,
('first', 'same'): 11,
('better', 'other'): 10,
('little', 'real'): 10,
('many', 'new'): 10,
('best', 'most'): 10,
('great', 'own'): 10,
('other', 'own'): 10,
('good', 'several'): 10,
('great', 'strong'): 10,
('little', 'true'): 10,
('few', 'many'): 10,
('only', 'other'): 10,
('great', 'most'): 10,
('other', 'real'): 10,
('funny', 'great'): 10,
('better', 'great'): 10,
('best', 'strong'): 10,
('big', 'short'): 10,
('big', 'real'): 10,
('entertaining', 'good'): 10,
('original', 'other'): 10,
('intelligent', 'southern'): 10,
('other', 'southern'): 10,
('southern', 'young'): 10,
('southern', 'strong'): 10,
('silent', 'sound'): 10,
('silent', 'such'): 10,
('greatest', 'silent'): 10,
('last', 'little'): 9,
('many', 'much'): 9,
('dead', 'own'): 9,
('purgatory', 'same'): 9,
('new', 'purgatory'): 9,
('impossible', 'purgatory'): 9,
('impossible', 'same'): 9,
('best', 'own'): 9,
('impossible', 'new'): 9,
('new', 'real'): 9,
('best', 'real'): 9,
('old', 'own'): 9,
('good', 'old'): 9,
('other', 'young'): 9,
('great', 'such'): 9,
('good', 'least'): 9,
('first', 'full'): 9,
('great', 'only'): 9,
('great', 'long'): 9,
('enough', 'great'): 9,
('other', 'realistic'): 9,
('old', 'same'): 9,
('full', 'little'): 9,
('few', 'same'): 9,
('great', 'original'): 9,
('real', 'short'): 9,
('good', 'last'): 8,
('least', 'more'): 8,
('more', 'same'): 8,
('more', 'own'): 8,
('more', 'new'): 8,
('European', 'little'): 8,
('much', 'same'): 8,
('most', 'same'): 8,
('real', 'same'): 8,
('new', 'own'): 8,
('American', 'young'): 8,
('few', 'first'): 8,
('good', 'young'): 8,
('many', 'strong'): 8,
('few', 'little'): 8,
('many', 'true'): 8,
('enough', 'few'): 8,
('little', 'main'): 8,
('big', 'such'): 8,
('big', 'most'): 8,
('bad', 'good'): 8,
('different', 'good'): 8,
('funny', 'little'): 8,
('same', 'young'): 8,
('brilliant', 'same'): 8,
('best', 'big'): 8,
('great', 'wonderful'): 8,
('best', 'better'): 8,
('new', 'strong'): 8,
('amazing', 'other'): 8,
('best', 'long'): 8,
('beautiful', 'best'): 8,
('best', 'dark'): 8,
('amazing', 'great'): 8,
('good', 'high'): 8,
('innocent', 'little'): 8,
('amusing', 'good'): 8,
('long', 'most'): 8,
('most', 'old'): 8,
('bad', 'better'): 8,
('short', 'such'): 8,
('better', 'much'): 7,
('more', 'much'): 7,
('best', 'more'): 7,
('-', 'little'): 7,
('close', 'little'): 7,
('dead', 'little'): 7,
('least', 'same'): 7,
('much', 'own'): 7,
('much', 'new'): 7,
('most', 'own'): 7,
('best', 'clean'): 7,
('American', 'old'): 7,
('old', 'other'): 7,
('own', 'young'): 7,
('first', 'own'): 7,
('important', 'most'): 7,
('good', 'memorable'): 7,
('few', 'only'): 7,
('little', 'sure'): 7,
('little', 'only'): 7,
('other', 'sure'): 7,
('many', 'such'): 7,
('bad', 'many'): 7,
('American', 'many'): 7,
('big', 'good'): 7,
('bad', 'most'): 7,
('few', 'most'): 7,
('different', 'other'): 7,
('great', 'short'): 7,
('better', 'many'): 7,
('other', 'various'): 7,
('best', 'true'): 7,
('same', 'various'): 7,
('same', 'true'): 7,
('great', 'true'): 7,
('charming', 'little'): 7,
('little', 'short'): 7,
('nice', 'short'): 7,
('low', 'other'): 7,
('good', 'interesting'): 7,
('good', 'true'): 7,
('better', 'entertaining'): 7,
('great', 'realistic'): 7,
('hot', 'nice'): 7,
('long', 'old'): 7,
('beautiful', 'many'): 7,
('fantastic', 'other'): 7,
('good', 'wild'): 7,
('effective', 'good'): 7,
('original', 'same'): 7,
('fantastic', 'original'): 7,
('full', 'same'): 7,
('high', 'other'): 7,
('good', 'special'): 7,
('most', 'strong'): 7,
('most', 'short'): 7,
('best', 'comic'): 7,
('comic', 'little'): 7,
('many', 'serious'): 7,
('new', 'realistic'): 7,
('anti', 'little'): 7,
('amazing', 'realistic'): 7,
('last', 'much'): 6,
('classic', 'much'): 6,
('classic', 'good'): 6,
('better', 'little'): 6,
('-', 'more'): 6,
('close', 'more'): 6,
('entertaining', 'many'): 6,
('little', 'new'): 6,
('European', 'many'): 6,
('dead', 'many'): 6,
('many', 'purgatory'): 6,
('impossible', 'many'): 6,
('best', 'close'): 6,
('dead', 'purgatory'): 6,
('dead', 'same'): 6,
('dead', 'new'): 6,
('dead', 'impossible'): 6,
('own', 'purgatory'): 6,
('best', 'purgatory'): 6,
('general', 'same'): 6,
('general', 'own'): 6,
('impossible', 'own'): 6,
('best', 'general'): 6,
('best', 'impossible'): 6,
('old', 'silent'): 6,
('good', 'new'): 6,
('good', 'pretty'): 6,
('brilliant', 'many'): 6,
('amazing', 'many'): 6,
('able', 'many'): 6,
('many', 'non'): 6,
('many', 'sure'): 6,
('many', 'sensitive'): 6,
('many', 'only'): 6,
('other', 'true'): 6,
('brilliant', 'little'): 6,
('famous', 'little'): 6,
('important', 'many'): 6,
('big', 'famous'): 6,
('important', 'other'): 6,
('bad', 'other'): 6,
('able', 'famous'): 6,
('own', 'such'): 6,
('classic', 'famous'): 6,
('bad', 'great'): 6,
('famous', 'much'): 6,
('great', 'local'): 6,
('great', 'last'): 6,
('American', 'most'): 6,
('little', 'young'): 6,
('funny', 'more'): 6,
('many', 'subtle'): 6,
('corrupt', 'ordinary'): 6,
('Estonian', 'ordinary'): 6,
('better', 'first'): 6,
('entire', 'real'): 6,
('first', 'real'): 6,
('entire', 'good'): 6,
('great', 'more'): 6,
('better', 'same'): 6,
('better', 'new'): 6,
('beautiful', 'strong'): 6,
('best', 'emotional'): 6,
('beautiful', 'true'): 6,
('new', 'true'): 6,
('easy', 'good'): 6,
('only', 'own'): 6,
('most', 'only'): 6,
('more', 'such'): 6,
('high', 'own'): 6,
('low', 'own'): 6,
('funny', 'such'): 6,
('good', 'innocent'): 6,
('good', 'wonderful'): 6,
('dramatic', 'good'): 6,
('new', 'original'): 6,
('great', 'worse'): 6,
('beautiful', 'good'): 6,
('long', 'many'): 6,
('big', 'right'): 6,
('only', 'strong'): 6,
('first', 'more'): 6,
('brilliant', 'original'): 6,
('high', 'same'): 6,
('intelligent', 'many'): 6,
('intelligent', 'most'): 6,
('realistic', 'strong'): 6,
('American', 'best'): 6,
('funny', 'perfect'): 6,
('good', 'perfect'): 6,
('only', 'real'): 6,
('greatest', 'old'): 6,
('best', 'greatest'): 6,
('comic', 'good'): 6,
('complex', 'little'): 6,
('average', 'good'): 6,
('good', 'ready'): 6,
('little', 'realistic'): 6,
('difficult', 'little'): 6,
('big', 'sound'): 6,
('big', 'greatest'): 6,
('silent', 'special'): 6,
('real', 'sound'): 6,
('short', 'sound'): 6,
('real', 'such'): 6,
('greatest', 'real'): 6,
('greatest', 'short'): 6,
('half', 'little'): 6,
('comic', 'nice'): 6,
('married', 'nice'): 6,
('desperate', 'new'): 6,
('dated', 'new'): 6,
('better', 'cheap'): 6,
('much', 'older'): 5,
('last', 'older'): 5,
('last', 'younger'): 5,
('classic', 'little'): 5,
('dead', 'more'): 5,
('more', 'most'): 5,
('more', 'real'): 5,
('more', 'ready'): 5,
('entertaining', 'same'): 5,
('-', 'same'): 5,
('general', 'modern'): 5,
('initial', 'little'): 5,
('little', 'sexual'): 5,
('little', 'ready'): 5,
('least', 'many'): 5,
('least', 'own'): 5,
('many', 'religious'): 5,
('fantastic', 'own'): 5,
('European', 'same'): 5,
('close', 'many'): 5,
('many', 'single'): 5,
('many', 'ready'): 5,
('close', 'same'): 5,
('mere', 'own'): 5,
('clean', 'same'): 5,
('same', 'single'): 5,
('general', 'new'): 5,
('general', 'old'): 5,
('few', 'old'): 5,
('American', 'great'): 5,
('American', 'other'): 5,
('American', 'good'): 5,
('great', 'young'): 5,
('first', 'great'): 5,
('general', 'other'): 5,
('general', 'young'): 5,
('other', 'worth'): 5,
('important', 'young'): 5,
('great', 'similar'): 5,
('able', 'good'): 5,
('alive', 'good'): 5,
('able', 'few'): 5,
('clear', 'little'): 5,
('little', 'sweet'): 5,
('historical', 'many'): 5,
('few', 'much'): 5,
('European', 'other'): 5,
('more', 'only'): 5,
('other', 'physical'): 5,
('bad', 'little'): 5,
('American', 'little'): 5,
('big', 'own'): 5,
('bad', 'big'): 5,
('main', 'other'): 5,
('able', 'great'): 5,
('most', 'such'): 5,
('brilliant', 'much'): 5,
('brilliant', 'last'): 5,
('bad', 'much'): 5,
('good', 'main'): 5,
('female', 'first'): 5,
('different', 'strong'): 5,
('little', 'next'): 5,
('little', 'obvious'): 5,
('incredible', 'same'): 5,
('brilliant', 'more'): 5,
('favorite', 'funny'): 5,
('funny', 'same'): 5,
('same', 'sure'): 5,
('dirty', 'young'): 5,
('best', 'dirty'): 5,
('different', 'most'): 5,
('interesting', 'short'): 5,
('better', 'entire'): 5,
('better', 'strong'): 5,
('important', 'strong'): 5,
('difficult', 'real'): 5,
('real', 'strong'): 5,
('many', 'simple'): 5,
('other', 'wonderful'): 5,
('funny', 'other'): 5,
('best', 'perfect'): 5,
('great', 'serious'): 5,
('long', 'other'): 5,
('beautiful', 'other'): 5,
('best', 'important'): 5,
('amazing', 'best'): 5,
('long', 'same'): 5,
('bold', 'great'): 5,
('beautiful', 'same'): 5,
('great', 'technical'): 5,
('charming', 'good'): 5,
('easy', 'little'): 5,
('low', 'more'): 5,
('innocent', 'more'): 5,
('high', 'little'): 5,
('charismatic', 'own'): 5,
('good', 'romantic'): 5,
('innocent', 'such'): 5,
('greatest', 'such'): 5,
('dead', 'good'): 5,
('little', 'smart'): 5,
('better', 'funny'): 5,
('bad', 'same'): 5,
('fresh', 'good'): 5,
('great', 'high'): 5,
('many', 'social'): 5,
('few', 'high'): 5,
('other', 'silly'): 5,
('first', 'nice'): 5,
('many', 'surprised'): 5,
('bad', 'old'): 5,
('absolute', 'other'): 5,
('good', 'worse'): 5,
('bad', 'fantastic'): 5,
('first', 'low'): 5,
('old', 'right'): 5,
('little', 'right'): 5,
('funny', 'old'): 5,
('big', 'old'): 5,
('brilliant', 'first'): 5,
('last', 'original'): 5,
('other', 'pure'): 5,
('great', 'top'): 5,
('sensitive', 'southern'): 5,
('social', 'southern'): 5,
('civil', 'southern'): 5,
('southern', 'subtle'): 5,
('realistic', 'southern'): 5,
('inside', 'southern'): 5,
('non', 'southern'): 5,
('-', 'southern'): 5,
('slave', 'southern'): 5,
('common', 'southern'): 5,
('dull', 'southern'): 5,
('historical', 'southern'): 5,
('simple', 'southern'): 5,
('confederate', 'southern'): 5,
('brilliant', 'southern'): 5,
('important', 'southern'): 5,
('southern', 'third'): 5,
('liberal', 'southern'): 5,
('skilled', 'southern'): 5,
('modest', 'southern'): 5,
('close', 'southern'): 5,
('southern', 'tough'): 5,
('drunk', 'southern'): 5,
('negligent', 'southern'): 5,
('inseparable', 'southern'): 5,
('best', 'southern'): 5,
('different', 'southern'): 5,
('long', 'southern'): 5,
('intense', 'southern'): 5,
('passionate', 'southern'): 5,
('only', 'southern'): 5,
('pure', 'southern'): 5,
('same', 'southern'): 5,
('old', 'southern'): 5,
('powerful', 'southern'): 5,
('breathtaking', 'southern'): 5,
('beautiful', 'southern'): 5,
('Appalachian', 'southern'): 5,
('broad', 'southern'): 5,
('educational', 'southern'): 5,
('middle', 'southern'): 5,
('lower', 'southern'): 5,
('short', 'southern'): 5,
('American', 'southern'): 5,
('few', 'southern'): 5,
('gifted', 'southern'): 5,
('southern', 'vocal'): 5,
('outstanding', 'southern'): 5,
('most', 'third'): 5,
('most', 'outstanding'): 5,
('long', 'short'): 5,
('best', 'original'): 5,
('best', 'funny'): 5,
('enough', 'same'): 5,
('good', 'whole'): 5,
('crazy', 'good'): 5,
('clean', 'old'): 5,
('cinematic', 'old'): 5,
('fresh', 'old'): 5,
('dark', 'old'): 5,
('personal', 'young'): 5,
('comic', 'young'): 5,
('best', 'cinematic'): 5,
('best', 'fresh'): 5,
('better', 'worth'): 5,
('bad', 'worth'): 5,
('bad', 'sure'): 5,
('hard', 'little'): 5,
('good', 'sweet'): 5,
('good', 'preachy'): 5,
('nice', 'such'): 5,
('amazing', 'good'): 5,
('best', 'such'): 5,
('old', 'perfect'): 5,
('old', 'such'): 5,
('best', 'first'): 5,
('first', 'worth'): 5,
('direct', 'silent'): 5,
('1st', 'silent'): 5,
('early', 'silent'): 5,
('little', 'silent'): 5,
('silent', 'top'): 5,
('familiar', 'silent'): 5,
('Later', 'silent'): 5,
('silent', 'similar'): 5,
('glorious', 'silent'): 5,
('pristine', 'silent'): 5,
('silent', 'uphill'): 5,
('positive', 'silent'): 5,
('double', 'silent'): 5,
('silent', 'technical'): 5,
('most', 'silent'): 5,
('fortunate', 'silent'): 5,
('instrumental', 'silent'): 5,
('best', 'silent'): 5,
('difficult', 'silent'): 5,
('entire', 'silent'): 5,
('silent', 'wise'): 5,
('selective', 'silent'): 5,
('next', 'silent'): 5,
('silent', 'subject'): 5,
('right', 'silent'): 5,
('popular', 'silent'): 5,
('Punctured', 'silent'): 5,
('concrete', 'silent'): 5,
('long', 'silent'): 5,
('silent', 'silver'): 5,
('silent', 'subdued'): 5,
('early', 'short'): 5,
('female', 'little'): 5,
('best', 'low'): 5,
('much', 'younger'): 4,
('better', 'last'): 4,
('older', 'other'): 4,
('favourite', 'good'): 4,
('better', 'classic'): 4,
('entertaining', 'more'): 4,
('European', 'more'): 4,
('general', 'more'): 4,
('entertaining', 'new'): 4,
('best', 'entertaining'): 4,
('many', 'rich'): 4,
('rich', 'same'): 4,
('-', 'most'): 4,
('-', 'new'): 4,
('modern', 'own'): 4,
('fantastic', 'little'): 4,
('impossible', 'little'): 4,
('fantastic', 'least'): 4,
('least', 'new'): 4,
('least', 'real'): 4,
('new', 'religious'): 4,
('best', 'religious'): 4,
('human', 'same'): 4,
('fantastic', 'much'): 4,
('dead', 'much'): 4,
('most', 'much'): 4,
('best', 'much'): 4,
('fantastic', 'many'): 4,
('fantastic', 'new'): 4,
('best', 'fantastic'): 4,
('contemporary', 'same'): 4,
('European', 'best'): 4,
('many', 'predictable'): 4,
('many', 'possible'): 4,
('many', 'real'): 4,
('close', 'most'): 4,
('close', 'new'): 4,
('own', 'possible'): 4,
('new', 'possible'): 4,
('best', 'possible'): 4,
('best', 'dead'): 4,
('dead', 'real'): 4,
('initial', 'same'): 4,
('same', 'sexual'): 4,
('less', 'same'): 4,
('most', 'new'): 4,
('impossible', 'most'): 4,
('most', 'real'): 4,
('huge', 'own'): 4,
('own', 'real'): 4,
('own', 'single'): 4,
('huge', 'new'): 4,
('general', 'huge'): 4,
('new', 'ready'): 4,
('best', 'initial'): 4,
('best', 'sexual'): 4,
('best', 'less'): 4,
('real', 'sexual'): 4,
('American', 'few'): 4,
('American', 'own'): 4,
('general', 'great'): 4,
('few', 'general'): 4,
('first', 'general'): 4,
('few', 'own'): 4,
('few', 'young'): 4,
('handsome', 'own'): 4,
('other', 'several'): 4,
('major', 'other'): 4,
('good', 'handsome'): 4,
('brilliant', 'own'): 4,
('bad', 'best'): 4,
('low', 'young'): 4,
('surprised', 'young'): 4,
('brilliant', 'great'): 4,
('good', 'strong'): 4,
('happy', 'many'): 4,
('linear', 'little'): 4,
('confusing', 'little'): 4,
('enough', 'little'): 4,
('little', 'loud'): 4,
('many', 'slow'): 4,
('linear', 'many'): 4,
('confusing', 'many'): 4,
('clear', 'many'): 4,
('interested', 'many'): 4,
('major', 'many'): 4,
('few', 'more'): 4,
('few', 'sure'): 4,
('-', 'non'): 4,
('non', 'other'): 4,
('-', 'other'): 4,
('confusing', 'linear'): 4,
('clear', 'linear'): 4,
('linear', 'sure'): 4,
('European', 'sure'): 4,
('more', 'true'): 4,
('clear', 'confusing'): 4,
('confusing', 'sure'): 4,
('clear', 'sure'): 4,
('much', 'sure'): 4,
('more', 'sure'): 4,
('sure', 'sweet'): 4,
('enough', 'other'): 4,
('other', 'sensitive'): 4,
('able', 'little'): 4,
('big', 'many'): 4,
('gifted', 'many'): 4,
('local', 'many'): 4,
('big', 'other'): 4,
('able', 'big'): 4,
('big', 'important'): 4,
('big', 'classic'): 4,
('big', 'main'): 4,
('big', 'much'): 4,
('able', 'other'): 4,
('able', 'own'): 4,
('able', 'classic'): 4,
('able', 'much'): 4,
('gifted', 'most'): 4,
('great', 'important'): 4,
('brilliant', 'few'): 4,
('brilliant', 'most'): 4,
('classic', 'great'): 4,
('great', 'much'): 4,
('few', 'local'): 4,
('good', 'local'): 4,
('sensual', 'younger'): 4,
('dark', 'good'): 4,
('female', 'good'): 4,
('good', 'sexual'): 4,
('enough', 'good'): 4,
('first', 'sexual'): 4,
('more', 'young'): 4,
('American', 'same'): 4,
('funny', 'sad'): 4,
('funny', 'many'): 4,
('obvious', 'same'): 4,
('comedic', 'great'): 4,
('many', 'mighty'): 4,
('new', 'numerous'): 4,
('Estonian', 'corrupt'): 4,
('different', 'many'): 4,
('different', 'new'): 4,
('beautiful', 'excellent'): 4,
('excellent', 'great'): 4,
('great', 'strange'): 4,
('beautiful', 'own'): 4,
('least', 'other'): 4,
('better', 'real'): 4,
('better', 'subtle'): 4,
('other', 'simple'): 4,
('difficult', 'other'): 4,
('hard', 'other'): 4,
('necessary', 'other'): 4,
('many', 'wrong'): 4,
('first', 'half'): 4,
('entire', 'simple'): 4,
('simple', 'strong'): 4,
('respectful', 'simple'): 4,
('difficult', 'first'): 4,
('difficult', 'good'): 4,
('entire', 'respectful'): 4,
('-', 'good'): 4,
('good', 'hard'): 4,
('good', 'responsible'): 4,
('interesting', 'other'): 4,
('great', 'impressive'): 4,
('best', 'only'): 4,
('close', 'young'): 4,
('best', 'serious'): 4,
('last', 'more'): 4,
('long', 'strong'): 4,
('same', 'strong'): 4,
('amazing', 'strong'): 4,
('best', 'wonderful'): 4,
('best', 'technical'): 4,
('Young', 'great'): 4,
('beautiful', 'long'): 4,
('dark', 'great'): 4,
('beautiful', 'new'): 4,
('amazing', 'new'): 4,
('charming', 'low'): 4,
('favorite', 'good'): 4,
('high', 'most'): 4,
('funny', 'low'): 4,
('endless', 'own'): 4,
('amusing', 'own'): 4,
('own', 'private'): 4,
('greatest', 'own'): 4,
('clever', 'own'): 4,
('better', 'own'): 4,
('endless', 'good'): 4,
('good', 'private'): 4,
('good', 'proper'): 4,
('good', 'greatest'): 4,
('good', 'smart'): 4,
('greatest', 'little'): 4,
('wonderful', 'younger'): 4,
('fresh', 'little'): 4,
('little', 'nice'): 4,
('little', 'wonderful'): 4,
('big', 'same'): 4,
('funny', 'nice'): 4,
('big', 'funny'): 4,
('good', 'short'): 4,
('great', 'surf'): 4,
('great', 'least'): 4,
('low', 'real'): 4,
('little', 'traumatic'): 4,
('big', 'entire'): 4,
('good', 'traumatic'): 4,
('black', 'good'): 4,
('good', 'white'): 4,
('good', 'silly'): 4,
('beautiful', 'special'): 4,
('nice', 'true'): 4,
('first', 'true'): 4,
('last', 'nice'): 4,
('first', 'last'): 4,
('good', 'original'): 4,
('better', 'original'): 4,
('better', 'nice'): 4,
('good', 'surprised'): 4,
('glad', 'good'): 4,
('good', 'hot'): 4,
('great', 'nice'): 4,
('own', 'true'): 4,
('old', 'true'): 4,
('absolute', 'same'): 4,
('same', 'silly'): 4,
('interesting', 'nice'): 4,
('first', 'much'): 4,
('great', 'right'): 4,
('own', 'right'): 4,
('first', 'funny'): 4,
('final', 'true'): 4,
('full', 'own'): 4,
('final', 'own'): 4,
('big', 'wise'): 4,
('much', 'original'): 4,
('first', 'original'): 4,
('first', 'various'): 4,
('first', 'high'): 4,
('fantastic', 'first'): 4,
('original', 'various'): 4,
('full', 'original'): 4,
('more', 'original'): 4,
('final', 'same'): 4,
('pure', 'same'): 4,
('other', 'slapstick'): 4,
('apparent', 'other'): 4,
('brilliant', 'various'): 4,
('fantastic', 'various'): 4,
('full', 'various'): 4,
('brilliant', 'fantastic'): 4,
('brilliant', 'full'): 4,
('fantastic', 'full'): 4,
('few', 'full'): 4,
('fresh', 'most'): 4,
('most', 'top'): 4,
('inventive', 'most'): 4,
('little', 'top'): 4,
('inventive', 'little'): 4,
('great', 'special'): 4,
('intelligent', 'other'): 4,
('intelligent', 'young'): 4,
('intelligent', 'strong'): 4,
('many', 'realistic'): 4,
('most', 'realistic'): 4,
('close', 'other'): 4,
('other', 'powerful'): 4,
('beautiful', 'young'): 4,
('strong', 'young'): 4,
('best', 'short'): 4,
('old', 'short'): 4,
('old', 'outstanding'): 4,
('powerful', 'strong'): 4,
('original', 'perfect'): 4,
('funny', 'original'): 4,
('great', 'whole'): 4,
('enough', 'real'): 4,
('few', 'real'): 4,
('new', 'only'): 4,
('best', 'early'): 4,
('old', 'pleased'): 4,
('pleased', 'young'): 4,
('best', 'pleased'): 4,
('greatest', 'young'): 4,
('old', 'personal'): 4,
('conflicting', 'old'): 4,
('further', 'old'): 4,
('old', 'potential'): 4,
('evil', 'old'): 4,
('dirty', 'old'): 4,
('old', 'visual'): 4,
('lasting', 'old'): 4,
('comic', 'old'): 4,
('conflicting', 'young'): 4,
('further', 'young'): 4,
('potential', 'young'): 4,
('evil', 'young'): 4,
('clean', 'young'): 4,
('visual', 'young'): 4,
('cinematic', 'young'): 4,
('fresh', 'young'): 4,
('lasting', 'young'): 4,
('dark', 'young'): 4,
('best', 'personal'): 4,
('best', 'conflicting'): 4,
('best', 'further'): 4,
('best', 'potential'): 4,
('best', 'evil'): 4,
('best', 'visual'): 4,
('best', 'lasting'): 4,
('bad', 'dark'): 4,
('Beguiled', 'bad'): 4,
('bad', 'sweet'): 4,
('bad', 'high'): 4,
('most', 'right'): 4,
('Beguiled', 'most'): 4,
('early', 'most'): 4,
('most', 'sweet'): 4,
('Beguiled', 'sweet'): 4,
('big', 'early'): 4,
('cheap', 'long'): 4,
('big', 'long'): 4,
('available', 'more'): 4,
('nice', 'sweet'): 4,
('good', 'subliminal'): 4,
('good', 'thin'): 4,
('good', 'scary'): 4,
('amusing', 'nice'): 4,
('nice', 'perfect'): 4,
('catchy', 'good'): 4,
('engaging', 'good'): 4,
('good', 'hilarious'): 4,
('emphatic', 'good'): 4,
('good', 'intelligent'): 4,
('good', 'scared'): 4,
('good', 'worthy'): 4,
('good', 'solid'): 4,
('good', 'superb'): 4,
('good', 'unrecognisable'): 4,
('perfect', 'such'): 4,
('many', 'several'): 4,
('High', 'many'): 4,
('amazing', 'little'): 4,
('good', 'serious'): 4,
('about', 'more'): 4,
('true', 'young'): 4,
('same', 'wild'): 4,
('long', 'such'): 4,
('old', 'rare'): 4,
('perfect', 'rare'): 4,
('great', 'rare'): 4,
('funny', 'rare'): 4,
('available', 'funny'): 4,
('funny', 'hysterical'): 4,
('next', 'such'): 4,
('new', 'next'): 4,
('difficult', 'short'): 4,
('right', 'short'): 4,
('/>My', 'good'): 4,
('good', 'impossible'): 4,
('last', 'new'): 4,
('older', 'true'): 4,
('Tomanian', 'heroic'): 4,
('Jewish', 'Tomanian'): 4,
('more', 'ridiculous'): 4,
('Jewish', 'heroic'): 4,
('entertaining', 'first'): 4,
('entertaining', 'worth'): 4,
('good', 'worth'): 4,
('real', 'right'): 4,
('old', 'real'): 4,
('big', 'special'): 4,
('early', 'such'): 4,
('great', 'sound'): 4,
('sound', 'such'): 4,
('greatest', 'sound'): 4,
('great', 'greatest'): 4,
('real', 'similar'): 4,
('next', 'real'): 4,
('funny', 'modern'): 4,
('funny', 'general'): 4,
('near', 'realistic'): 4,
('amazing', 'perfect'): 4,
('easy', 'perfect'): 4,
('perfect', 'realistic'): 4,
('amazing', 'easy'): 4,
('easy', 'realistic'): 4,
('comic', 'short'): 4,
('married', 'short'): 4,
('hot', 'short'): 4,
('comic', 'married'): 4,
('comic', 'hot'): 4,
('comic', 'first'): 4,
('hot', 'married'): 4,
('desperate', 'realistic'): 4,
('dated', 'realistic'): 4,
('desperate', 'other'): 4,
('desperate', 'strong'): 4,
('dated', 'desperate'): 4,
('new', 'typical'): 4,
('cool', 'other'): 4,
('dated', 'other'): 4,
('dated', 'strong'): 4,
('amazing', 'convincing'): 4,
('amazing', 'bad'): 4,
('convincing', 'other'): 4,
('bad', 'convincing'): 4,
('other', 'sexual'): 4,
('Christian', 'surprised'): 4,
('serious', 'surprised'): 4,
...})
In [6]:
# ネガティブレビューの共起ペア生成
cooccurrence_negative = generate_cooccurrence(review_negative['tokens'])
cooccurrence_negative
Out[6]:
Counter({('bad', 'good'): 51,
('bad', 'terrible'): 27,
('good', 'red'): 24,
('good', 'terrible'): 23,
('better', 'good'): 23,
('good', 'many'): 23,
('good', 'other'): 22,
('main', 'red'): 21,
('bad', 'same'): 20,
('bad', 'first'): 20,
('good', 'little'): 19,
('good', 'such'): 19,
('bad', 'better'): 18,
('funny', 'many'): 18,
('first', 'good'): 17,
('bad', 'many'): 17,
('bad', 'last'): 16,
('bad', 'other'): 16,
('good', 'great'): 16,
('good', 'stupid'): 16,
('evil', 'such'): 16,
('good', 'more'): 16,
('good', 'real'): 16,
('better', 'many'): 16,
('genetic', 'psychical'): 16,
('bad', 'little'): 15,
('bad', 'particular'): 15,
('awful', 'red'): 15,
('modern', 'red'): 15,
('bad', 'whole'): 14,
('bad', 'worst'): 14,
('good', 'same'): 14,
('-', 'real'): 14,
('good', 'least'): 14,
('awful', 'bad'): 14,
('bad', 'stupid'): 14,
('bad', 'new'): 14,
('bad', 'funny'): 14,
('good', 'main'): 14,
('big', 'red'): 14,
('evil', 'red'): 14,
('more', 'same'): 14,
('first', 'more'): 14,
('black', 'red'): 14,
('red', 'such'): 14,
('emotional', 'red'): 14,
('baroque', 'red'): 14,
('famous', 'red'): 14,
('good', 'young'): 14,
('terrible', 'worst'): 13,
('bad', 'interesting'): 13,
('good', 'last'): 13,
('real', 'true'): 13,
('big', 'good'): 13,
('better', 'funny'): 13,
('many', 'more'): 13,
('good', 'interesting'): 12,
('better', 'other'): 12,
('first', 'great'): 12,
('funny', 'good'): 12,
('good', 'much'): 12,
('awful', 'good'): 12,
('first', 'same'): 12,
('bad', 'horrible'): 12,
('good', 'horrible'): 12,
('first', 'little'): 12,
('new', 'same'): 12,
('evil', 'great'): 12,
('bad', 'more'): 12,
('evil', 'good'): 12,
('criminal', 'real'): 12,
('least', 'real'): 12,
('Nazi', 'real'): 12,
('first', 'sexy'): 12,
('more', 'sexy'): 12,
('many', 'such'): 12,
('good', 'hard'): 11,
('best', 'good'): 11,
('bad', 'great'): 11,
('first', 'sure'): 11,
('good', 'new'): 11,
('-', 'good'): 11,
('first', 'red'): 11,
('bad', 'best'): 10,
('bad', 'worse'): 10,
('old', 'other'): 10,
('first', 'other'): 10,
('only', 'special'): 10,
('emotional', 'good'): 10,
('first', 'much'): 10,
('good', 'special'): 10,
('bad', 'slow'): 10,
('better', 'great'): 10,
('ridiculous', 'such'): 10,
('main', 'more'): 10,
('main', 'same'): 10,
('bad', 'red'): 10,
('more', 'red'): 10,
('emotional', 'such'): 10,
('red', 'same'): 10,
('better', 'more'): 10,
('hard', 'terrible'): 9,
('better', 'terrible'): 9,
('good', 'worst'): 9,
('first', 'small'): 9,
('good', 'own'): 9,
('-', 'bad'): 9,
('bad', 'such'): 9,
('awful', 'same'): 9,
('other', 'same'): 9,
('good', 'only'): 9,
('bad', 'different'): 9,
('good', 'second'): 9,
('bad', 'only'): 9,
('little', 'only'): 9,
('many', 'real'): 9,
('horrible', 'real'): 9,
('great', 'many'): 9,
('bad', 'small'): 8,
('bad', 'convincing'): 8,
('first', 'interesting'): 8,
('bad', 'serious'): 8,
('bad', 'own'): 8,
('bad', 'sure'): 8,
('high', 'least'): 8,
('least', 'slow'): 8,
('least', 'same'): 8,
('first', 'many'): 8,
('black', 'emotional'): 8,
('same', 'slow'): 8,
('first', 'special'): 8,
('black', 'good'): 8,
('good', 'tough'): 8,
('first', 'last'): 8,
('last', 'little'): 8,
('big', 'much'): 8,
('much', 'red'): 8,
('last', 'main'): 8,
('last', 'red'): 8,
('big', 'main'): 8,
('fantastic', 'such'): 8,
('red', 'ridiculous'): 8,
('own', 'such'): 8,
('own', 'red'): 8,
('interesting', 'red'): 8,
('great', 'other'): 8,
('new', 'red'): 8,
('original', 'red'): 8,
('-', 'red'): 8,
('good', 'most'): 8,
('most', 'real'): 8,
('horrible', 'other'): 8,
('absurd', 'many'): 8,
('sexy', 'small'): 8,
('sexy', 'silly'): 8,
('Sunny', 'sexy'): 8,
('sexy', 'wounded'): 8,
('medical', 'sexy'): 8,
('azure', 'beautiful'): 8,
('beautiful', 'female'): 8,
('bad', 'hot'): 8,
('bad', 'clear'): 8,
('least', 'terrible'): 8,
('bad', 'insane'): 8,
('Spanish', 'main'): 8,
('main', 'pretty'): 8,
('genetic', 'whole'): 8,
('psychical', 'whole'): 8,
('genetic', 'psychic'): 8,
('psychic', 'psychical'): 8,
('bad', 'genetic'): 8,
('bad', 'psychical'): 8,
('terrible', 'whole'): 7,
('bad', 'hard'): 7,
('better', 'hard'): 7,
('better', 'interesting'): 7,
('last', 'same'): 7,
('OK', 'black'): 7,
('great', 'interesting'): 7,
('first', 'own'): 7,
('good', 'short'): 7,
('little', 'much'): 7,
('bad', 'familiar'): 7,
('Bad', 'bad'): 7,
('bad', 'black'): 7,
('lazy', 'real'): 7,
('first', 'high'): 7,
('awful', 'other'): 7,
('awful', 'first'): 7,
('awful', 'black'): 7,
('funny', 'other'): 7,
('many', 'same'): 7,
('first', 'only'): 7,
('old', 'same'): 7,
('first', 'slow'): 7,
('boring', 'first'): 7,
('first', 'silly'): 7,
('poor', 'same'): 7,
('awful', 'such'): 7,
('little', 'stupid'): 7,
('dimensional', 'evil'): 7,
('evil', 'much'): 7,
('evil', 'first'): 7,
('evil', 'horrible'): 7,
('20th', 'evil'): 7,
('evil', 'sympathetic'): 7,
('boring', 'great'): 7,
('glossy', 'red'): 7,
('more', 'much'): 7,
('impuissant', 'red'): 7,
('interested', 'main'): 7,
('interested', 'red'): 7,
('bizarre', 'red'): 7,
('red', 'very'): 7,
('fortunate', 'red'): 7,
('red', 'tenuous'): 7,
('red', 'tremendous'): 7,
('latter', 'red'): 7,
('general', 'main'): 7,
('general', 'red'): 7,
('average', 'red'): 7,
('quotidian', 'red'): 7,
('impressive', 'red'): 7,
('grotesque', 'red'): 7,
('fantastic', 'red'): 7,
('good', 'ridiculous'): 7,
('evil', 'main'): 7,
('milquetoast', 'red'): 7,
('final', 'red'): 7,
('corporate', 'red'): 7,
('red', 'spiritual'): 7,
('red', 'religious'): 7,
('altruistic', 'red'): 7,
('red', 'sympathetic'): 7,
('blunt', 'red'): 7,
('dimensional', 'red'): 7,
('modern', 'more'): 7,
('more', 'other'): 7,
('good', 'modern'): 7,
('elderly', 'red'): 7,
('confused', 'red'): 7,
('definite', 'red'): 7,
('indispensable', 'red'): 7,
('modern', 'same'): 7,
('funerary', 'red'): 7,
('red', 'suicidal'): 7,
('red', 'simple'): 7,
('red', 'second'): 7,
('other', 'red'): 7,
('appropriate', 'red'): 7,
('red', 'sacred'): 7,
('extreme', 'red'): 7,
('inherent', 'red'): 7,
('bold', 'red'): 7,
('receptive', 'red'): 7,
('indulgent', 'red'): 7,
('red', 'risky'): 7,
('red', 'vulnerable'): 7,
('enormous', 'red'): 7,
('earliest', 'red'): 7,
('dead', 'red'): 7,
('Hebrew', 'red'): 7,
('anachronistic', 'red'): 7,
('accepted', 'red'): 7,
('mysterious', 'red'): 7,
('frieze', 'red'): 7,
('complete', 'red'): 7,
('red', 'resplendent'): 7,
('formal', 'red'): 7,
('musical', 'red'): 7,
('quasi', 'red'): 7,
('great', 'red'): 7,
('red', 'restive'): 7,
('absolute', 'red'): 7,
('best', 'red'): 7,
('masterful', 'red'): 7,
('dramatic', 'red'): 7,
('poignant', 'red'): 7,
('alive', 'red'): 7,
('minor', 'red'): 7,
('first', 'new'): 7,
('brutal', 'real'): 7,
('big', 'least'): 7,
('horrible', 'many'): 7,
('funny', 'more'): 7,
('big', 'little'): 7,
('bad', 'pathetic'): 7,
('bad', 'young'): 7,
('bad', 'major'): 7,
('bad', 'perfect'): 6,
('terrible', 'top'): 6,
('good', 'top'): 6,
('good', 'whole'): 6,
('better', 'whole'): 6,
('good', 'small'): 6,
('OK', 'good'): 6,
('interesting', 'same'): 6,
('bad', 'decent'): 6,
('generous', 'plastic'): 6,
('funny', 'stupid'): 6,
('funny', 'plastic'): 6,
('convincing', 'good'): 6,
('funny', 'great'): 6,
('interesting', 'own'): 6,
('Original', 'bad'): 6,
('little', 'many'): 6,
('bad', 'much'): 6,
('bad', 'predictable'): 6,
('black', 'first'): 6,
('better', 'first'): 6,
('-', 'true'): 6,
('black', 'fair'): 6,
('bad', 'original'): 6,
('high', 'slow'): 6,
('OK', 'high'): 6,
('good', 'high'): 6,
('awful', 'least'): 6,
('least', 'only'): 6,
('OK', 'least'): 6,
('least', 'special'): 6,
('least', 'much'): 6,
('other', 'whole'): 6,
('many', 'whole'): 6,
('awful', 'emotional'): 6,
('awful', 'slow'): 6,
('only', 'other'): 6,
('OK', 'emotional'): 6,
('OK', 'slow'): 6,
('slow', 'special'): 6,
('good', 'slow'): 6,
('OK', 'special'): 6,
('OK', 'fancy'): 6,
('much', 'special'): 6,
('good', 'single'): 6,
('average', 'good'): 6,
('much', 'same'): 6,
('black', 'same'): 6,
('first', 'such'): 6,
('awful', 'terrible'): 6,
('bad', 'female'): 6,
('few', 'good'): 6,
('second', 'stupid'): 6,
('bad', 'second'): 6,
('first', 'second'): 6,
('little', 'second'): 6,
('bad', 'single'): 6,
('bad', 'special'): 6,
('first', 'tough'): 6,
('first', 'horrible'): 6,
('little', 'special'): 6,
('many', 'new'): 6,
('infamous', 'same'): 6,
('great', 'much'): 6,
('first', 'ridiculous'): 6,
('bad', 'entire'): 6,
('new', 'own'): 6,
('great', 'new'): 6,
('much', 'such'): 6,
('awful', 'main'): 6,
('big', 'such'): 6,
('bad', 'latter'): 6,
('black', 'main'): 6,
('main', 'modern'): 6,
('main', 'such'): 6,
('emotional', 'main'): 6,
('baroque', 'main'): 6,
('famous', 'main'): 6,
('emotional', 'evil'): 6,
('dead', 'good'): 6,
('baroque', 'good'): 6,
('famous', 'good'): 6,
('new', 'other'): 6,
('personal', 'real'): 6,
('enjoyment', 'real'): 6,
('good', 'successful'): 6,
('awful', 'real'): 6,
('miserable', 'real'): 6,
('least', 'true'): 6,
('real', 'short'): 6,
('real', 'successful'): 6,
('alcoholic', 'real'): 6,
('nice', 'real'): 6,
('high', 'real'): 6,
('/>These', 'real'): 6,
('beautiful', 'real'): 6,
('indulgent', 'real'): 6,
('narcissistic', 'real'): 6,
('big', 'real'): 6,
('beefy', 'real'): 6,
('real', 'skinny'): 6,
('early', 'real'): 6,
('Many', 'real'): 6,
('real', 'vulgar'): 6,
('larger', 'real'): 6,
('dumb', 'real'): 6,
('real', 'special'): 6,
('real', 'stupid'): 6,
('dead', 'real'): 6,
('lousy', 'real'): 6,
('-', 'least'): 6,
('beautiful', 'many'): 6,
('most', 'same'): 6,
('absurd', 'funny'): 6,
('absurd', 'better'): 6,
('special', 'such'): 6,
('bad', 'most'): 6,
('least', 'worst'): 6,
('more', 'small'): 6,
('Sunny', 'first'): 6,
('first', 'wounded'): 6,
('first', 'medical'): 6,
('more', 'silly'): 6,
('obvious', 'sexy'): 6,
('Sunny', 'more'): 6,
('bad', 'sad'): 6,
('more', 'wounded'): 6,
('more', 'sure'): 6,
('little', 'more'): 6,
('little', 'same'): 6,
('medical', 'more'): 6,
('good', 'wrong'): 6,
('hard', 'other'): 6,
('last', 'many'): 6,
('successful', 'young'): 6,
('many', 'young'): 6,
('boring', 'violent'): 6,
('clear', 'violent'): 6,
('bad', 'blue'): 6,
('friendly', 'violent'): 6,
('main', 'many'): 6,
('double', 'good'): 6,
('bad', 'surprised'): 6,
('evil', 'many'): 6,
('professional', 'terrible'): 6,
('such', 'wicked'): 6,
('equivalent', 'such'): 6,
('20th', 'such'): 6,
('such', 'sweet'): 6,
('human', 'such'): 6,
('pure', 'such'): 6,
('daily', 'such'): 6,
('constant', 'such'): 6,
('cruel', 'such'): 6,
('monstrous', 'such'): 6,
('insane', 'such'): 6,
('preposterous', 'such'): 6,
('ill', 'such'): 6,
('lost', 'such'): 6,
('Sane', 'such'): 6,
('addicted', 'such'): 6,
('real', 'such'): 6,
('horrible', 'such'): 6,
('legal', 'such'): 6,
('such', 'vast'): 6,
('deceitful', 'such'): 6,
('selfish', 'such'): 6,
('less', 'such'): 6,
('civil', 'such'): 6,
('rational', 'such'): 6,
('concerned', 'such'): 6,
('such', 'vile'): 6,
('intellectual', 'such'): 6,
('brutal', 'such'): 6,
('perfect', 'terrible'): 5,
('small', 'terrible'): 5,
('same', 'terrible'): 5,
('hard', 'worst'): 5,
('interesting', 'whole'): 5,
('better', 'worst'): 5,
('good', 'less'): 5,
('best', 'better'): 5,
('better', 'same'): 5,
('black', 'funny'): 5,
('OK', 'funny'): 5,
('funny', 'old'): 5,
('good', 'proper'): 5,
('good', 'worth'): 5,
('first', 'funny'): 5,
('complete', 'good'): 5,
('good', 'particular'): 5,
('bad', 'lousy'): 5,
('bad', 'cheesy'): 5,
('bad', 'super'): 5,
('bad', 'real'): 5,
('interesting', 'sure'): 5,
('bad', 'old'): 5,
('gay', 'old'): 5,
('short', 'special'): 5,
('high', 'old'): 5,
('high', 'special'): 5,
('high', 'same'): 5,
('special', 'top'): 5,
('awful', 'whole'): 5,
('predictable', 'whole'): 5,
('first', 'whole'): 5,
('many', 'other'): 5,
('black', 'other'): 5,
('many', 'subtle'): 5,
('boring', 'many'): 5,
('much', 'only'): 5,
('first', 'old'): 5,
('black', 'slow'): 5,
('first', 'poor'): 5,
('good', 'odd'): 5,
('decent', 'much'): 5,
('black', 'special'): 5,
('awful', 'nice'): 5,
('bad', 'embarrassing'): 5,
('bad', 'easy'): 5,
('impossible', 'terrible'): 5,
('basic', 'same'): 5,
('different', 'same'): 5,
('few', 'predictable'): 5,
('last', 'second'): 5,
('special', 'stupid'): 5,
('horrible', 'stupid'): 5,
('bad', 'tough'): 5,
('good', 'obvious'): 5,
('little', 'tough'): 5,
('ridiculous', 'same'): 5,
('same', 'unlikely'): 5,
('evil', 'simplistic'): 5,
('deep', 'evil'): 5,
('boring', 'evil'): 5,
('confusing', 'evil'): 5,
('evil', 'inept'): 5,
('evil', 'phsycotic'): 5,
('great', 'little'): 5,
('great', 'main'): 5,
('interesting', 'new'): 5,
('long', 'more'): 5,
('main', 'much'): 5,
('big', 'last'): 5,
('last', 'new'): 5,
('awful', 'big'): 5,
('bad', 'big'): 5,
('big', 'more'): 5,
('big', 'interesting'): 5,
('big', 'other'): 5,
('big', 'first'): 5,
('fantastic', 'good'): 5,
('main', 'other'): 5,
('interesting', 'more'): 5,
('great', 'more'): 5,
('good', 'indulgent'): 5,
('dramatic', 'good'): 5,
('black', 'such'): 5,
('black', 'great'): 5,
('first', 'modern'): 5,
('original', 'same'): 5,
('good', 'true'): 5,
('beautiful', 'nice'): 5,
('least', 'most'): 5,
('Many', 'many'): 5,
('horrible', 'more'): 5,
('best', 'many'): 5,
('many', 'negative'): 5,
('many', 'obnoxious'): 5,
('great', 'old'): 5,
('only', 'such'): 5,
('big', 'only'): 5,
('first', 'most'): 5,
('dead', 'young'): 5,
('first', 'particular'): 5,
('more', 'tough'): 5,
('long', 'sexy'): 5,
('good', 'impossible'): 5,
('first', 'typical'): 5,
('bad', 'boring'): 5,
('better', 'boring'): 5,
('better', 'few'): 5,
('least', 'little'): 5,
('adequate', 'bad'): 5,
('bad', 'uncomprehended'): 5,
('bad', 'pastel'): 5,
('bad', 'coloured'): 5,
('bad', 'physical'): 5,
('bad', 'psychological'): 5,
('bad', 'tacky'): 5,
('bad', 'hilarious'): 5,
('bad', 'bankrupt'): 5,
('bad', 'top'): 4,
('OK', 'top'): 4,
('best', 'terrible'): 4,
('less', 'terrible'): 4,
('interesting', 'terrible'): 4,
('hard', 'same'): 4,
('bad', 'racist'): 4,
('bad', 'less'): 4,
('OK', 'bad'): 4,
('OK', 'whole'): 4,
('same', 'whole'): 4,
('same', 'worst'): 4,
('same', 'small'): 4,
('better', 'last'): 4,
('OK', 'same'): 4,
('irish', 'oirish'): 4,
('funny', 'generous'): 4,
('OK', 'old'): 4,
('good', 'mediocre'): 4,
('bad', 'worth'): 4,
('slow', 'wooden'): 4,
('great', 'own'): 4,
('complete', 'great'): 4,
('complete', 'first'): 4,
('funny', 'own'): 4,
('good', 'next'): 4,
('many', 'particular'): 4,
('many', 'much'): 4,
('familiar', 'many'): 4,
('best', 'full'): 4,
('bad', 'worthy'): 4,
('bad', 'right'): 4,
('bad', 'next'): 4,
('bleak', 'sloppy'): 4,
('-', 'black'): 4,
('better', 'sure'): 4,
('better', 'common'): 4,
('black', 'full'): 4,
('hard', 'old'): 4,
('old', 'such'): 4,
('least', 'short'): 4,
('awful', 'high'): 4,
('emotional', 'high'): 4,
('high', 'silly'): 4,
('fancy', 'high'): 4,
('black', 'high'): 4,
('high', 'poor'): 4,
('least', 'top'): 4,
('only', 'top'): 4,
('least', 'other'): 4,
('least', 'many'): 4,
('least', 'old'): 4,
('emotional', 'least'): 4,
('funny', 'least'): 4,
('first', 'least'): 4,
('fancy', 'least'): 4,
('black', 'least'): 4,
('least', 'poor'): 4,
('funny', 'whole'): 4,
('slow', 'whole'): 4,
('special', 'whole'): 4,
('right', 'slow'): 4,
('right', 'same'): 4,
('emotional', 'grotesque'): 4,
('good', 'grotesque'): 4,
('black', 'grotesque'): 4,
('emotional', 'other'): 4,
('other', 'slow'): 4,
('odd', 'other'): 4,
('much', 'other'): 4,
('emotional', 'many'): 4,
('many', 'special'): 4,
('funny', 'subtle'): 4,
('old', 'slow'): 4,
('good', 'old'): 4,
('predictable', 'slow'): 4,
('good', 'predictable'): 4,
('poor', 'predictable'): 4,
('emotional', 'slow'): 4,
('emotional', 'first'): 4,
('emotional', 'special'): 4,
('emotional', 'interested'): 4,
('average', 'emotional'): 4,
('emotional', 'fancy'): 4,
('emotional', 'much'): 4,
('emotional', 'same'): 4,
('funny', 'slow'): 4,
('boring', 'funny'): 4,
('funny', 'same'): 4,
('boring', 'slow'): 4,
('fancy', 'slow'): 4,
('poor', 'slow'): 4,
('decent', 'good'): 4,
('boring', 'special'): 4,
('single', 'special'): 4,
('fancy', 'special'): 4,
('same', 'special'): 4,
('good', 'interested'): 4,
('black', 'interested'): 4,
('boring', 'much'): 4,
('boring', 'good'): 4,
('black', 'boring'): 4,
('dull', 'good'): 4,
('black', 'single'): 4,
('good', 'silly'): 4,
('cheap', 'good'): 4,
('average', 'black'): 4,
('black', 'fancy'): 4,
('black', 'much'): 4,
('cool', 'good'): 4,
('disappointing', 'same'): 4,
('giant', 'nice'): 4,
('bad', 'nice'): 4,
('nice', 'terrible'): 4,
('nice', 'whole'): 4,
('female', 'nice'): 4,
('awful', 'extreme'): 4,
('other', 'such'): 4,
('other', 'terrible'): 4,
('basic', 'other'): 4,
('other', 'wrong'): 4,
('sure', 'whole'): 4,
('awful', 'final'): 4,
('awful', 'giant'): 4,
('bad', 'giant'): 4,
('giant', 'terrible'): 4,
('giant', 'whole'): 4,
('able', 'bad'): 4,
('bad', 'impossible'): 4,
('bad', 'wrong'): 4,
('female', 'terrible'): 4,
('terrible', 'wrong'): 4,
('female', 'whole'): 4,
('same', 'wrong'): 4,
('great', 'last'): 4,
('first', 'stupid'): 4,
('stupid', 'tough'): 4,
('single', 'stupid'): 4,
('only', 'stupid'): 4,
('stupid', 'uninspired'): 4,
('bright', 'good'): 4,
('bright', 'first'): 4,
('first', 'obvious'): 4,
('last', 'only'): 4,
('best', 'horrible'): 4,
('better', 'big'): 4,
('infamous', 'new'): 4,
('many', 'ridiculous'): 4,
('first', 'willing'): 4,
('entire', 'little'): 4,
('funny', 'horrible'): 4,
('Terrible', 'good'): 4,
('good', 'ill'): 4,
('interesting', 'main'): 4,
('cheap', 'great'): 4,
('average', 'main'): 4,
('better', 'gory'): 4,
('absurd', 'great'): 4,
('absurd', 'good'): 4,
('better', 'bitter'): 4,
('interesting', 'much'): 4,
('last', 'more'): 4,
('famous', 'last'): 4,
('awful', 'evil'): 4,
('awful', 'own'): 4,
('awful', 'interesting'): 4,
('awful', 'modern'): 4,
('awful', 'new'): 4,
('-', 'awful'): 4,
('awful', 'baroque'): 4,
('awful', 'famous'): 4,
('big', 'evil'): 4,
('big', 'black'): 4,
('big', 'modern'): 4,
('big', 'emotional'): 4,
('big', 'same'): 4,
('-', 'big'): 4,
('baroque', 'big'): 4,
('big', 'famous'): 4,
('modern', 'tenuous'): 4,
('good', 'latter'): 4,
('evil', 'fantastic'): 4,
('evil', 'ridiculous'): 4,
('more', 'ridiculous'): 4,
('bad', 'main'): 4,
('first', 'main'): 4,
('evil', 'own'): 4,
('black', 'evil'): 4,
('evil', 'modern'): 4,
('baroque', 'evil'): 4,
('evil', 'famous'): 4,
('more', 'new'): 4,
('anachronistic', 'good'): 4,
('good', 'musical'): 4,
('own', 'same'): 4,
('interesting', 'such'): 4,
('black', 'modern'): 4,
('baroque', 'black'): 4,
('black', 'famous'): 4,
('modern', 'such'): 4,
('emotional', 'modern'): 4,
('baroque', 'modern'): 4,
('famous', 'modern'): 4,
('baroque', 'such'): 4,
('great', 'such'): 4,
('famous', 'such'): 4,
('minor', 'such'): 4,
('complete', 'other'): 4,
('best', 'other'): 4,
('baroque', 'emotional'): 4,
('emotional', 'famous'): 4,
('great', 'same'): 4,
('-', 'dead'): 4,
('first', 'original'): 4,
('famous', 'first'): 4,
('baroque', 'famous'): 4,
('good', 'personal'): 4,
('criminal', 'good'): 4,
('good', 'nice'): 4,
('Nazi', 'good'): 4,
('brutal', 'good'): 4,
('awful', 'stupid'): 4,
('criminal', 'true'): 4,
('big', 'true'): 4,
('Nazi', 'true'): 4,
('criminal', 'least'): 4,
('-', 'criminal'): 4,
('Nazi', 'criminal'): 4,
('least', 'vulgar'): 4,
('Nazi', 'least'): 4,
('big', 'special'): 4,
('horrible', 'most'): 4,
('-', 'Nazi'): 4,
('more', 'poor'): 4,
('modern', 'unfunny'): 4,
('same', 'unfunny'): 4,
('horrible', 'same'): 4,
('great', 'whole'): 4,
('many', 'plain'): 4,
('absurd', 'more'): 4,
('common', 'many'): 4,
('hotter', 'many'): 4,
('absurdist', 'many'): 4,
('fantastical', 'many'): 4,
('many', 'skeptical'): 4,
('critical', 'many'): 4,
('bitter', 'many'): 4,
('many', 'unaired'): 4,
('correct', 'many'): 4,
('many', 'psychotic'): 4,
('better', 'negative'): 4,
('good', 'negative'): 4,
('Other', 'same'): 4,
('repetitive', 'same'): 4,
('first', 'long'): 4,
('popular', 'special'): 4,
('ridiculous', 'special'): 4,
('bad', 'least'): 4,
('bad', 'yellow'): 4,
('bad', 'poor'): 4,
('funny', 'new'): 4,
('funny', 'worst'): 4,
('same', 'yellow'): 4,
('least', 'red'): 4,
('least', 'yellow'): 4,
('last', 'least'): 4,
('slow', 'yellow'): 4,
('poor', 'young'): 4,
('dead', 'poor'): 4,
('first', 'full'): 4,
('possible', 'terrible'): 4,
('sexy', 'tragic'): 4,
('high', 'sexy'): 4,
('silly', 'small'): 4,
('Sunny', 'small'): 4,
('small', 'wounded'): 4,
('medical', 'small'): 4,
('much', 'sexy'): 4,
('much', 'sure'): 4,
('sexy', 'tough'): 4,
('Sunny', 'silly'): 4,
('silly', 'wounded'): 4,
('medical', 'silly'): 4,
('modern', 'sexy'): 4,
('femme', 'sexy'): 4,
('fatal', 'sexy'): 4,
('bad', 'sexy'): 4,
('sexy', 'sure'): 4,
('good', 'sexy'): 4,
('fresh', 'sexy'): 4,
('little', 'sexy'): 4,
('poor', 'sexy'): 4,
('classic', 'sexy'): 4,
('Double', 'sexy'): 4,
('bright', 'sexy'): 4,
('devious', 'sexy'): 4,
('particular', 'sexy'): 4,
('compelling', 'sexy'): 4,
('same', 'sexy'): 4,
('rural', 'sexy'): 4,
('implausible', 'sexy'): 4,
('ridiculous', 'sexy'): 4,
('sad', 'sexy'): 4,
('sexy', 'stereotypical'): 4,
('Hispanic', 'sexy'): 4,
('sexy', 'unintentional'): 4,
('red', 'sexy'): 4,
('Sunny', 'wounded'): 4,
('Sunny', 'medical'): 4,
('medical', 'wounded'): 4,
('good', 'sure'): 4,
('more', 'particular'): 4,
('beautiful', 'much'): 4,
('much', 'nice'): 4,
('Mediterrenean', 'beautiful'): 4,
('beautiful', 'lovely'): 4,
('beautiful', 'romantic'): 4,
('beautiful', 'convincing'): 4,
('beautiful', 'egomaniac'): 4,
('beautiful', 'bent'): 4,
('beautiful', 'vulnerable'): 4,
('beautiful', 'familiar'): 4,
('beautiful', 'macho'): 4,
('beautiful', 'obnoxious'): 4,
('beautiful', 'difficult'): 4,
('beautiful', 'central'): 4,
('beautiful', 'mediocre'): 4,
('azure', 'female'): 4,
('convincing', 'many'): 4,
('little', 'other'): 4,
('little', 'new'): 4,
('little', 'most'): 4,
('catholic', 'other'): 4,
('catholic', 'young'): 4,
('catholic', 'good'): 4,
('catholic', 'tough'): 4,
('other', 'young'): 4,
('other', 'tough'): 4,
('most', 'other'): 4,
('better', 'new'): 4,
('hard', 'young'): 4,
('hard', 'impossible'): 4,
('more', 'young'): 4,
('tough', 'young'): 4,
('same', 'young'): 4,
('broad', 'good'): 4,
('-', 'terrible'): 4,
('funny', 'smart'): 4,
('bad', 'typical'): 4,
('bad', 'full'): 4,
('bad', 'close'): 4,
('bad', 'technical'): 4,
('close', 'good'): 4,
('little', 'typical'): 4,
('many', 'typical'): 4,
('many', 'sure'): 4,
('last', 'typical'): 4,
('clear', 'first'): 4,
('much', 'successful'): 4,
('vampire', 'whole'): 4,
('few', 'other'): 4,
('angry', 'young'): 4,
('unrelenting', 'young'): 4,
('closest', 'young'): 4,
('rundown', 'young'): 4,
('usual', 'young'): 4,
('personal', 'young'): 4,
('entrenched', 'young'): 4,
('older', 'young'): 4,
('experienced', 'young'): 4,
('dramatic', 'young'): 4,
('overall', 'young'): 4,
('complex', 'young'): 4,
('only', 'worst'): 4,
('few', 'funny'): 4,
('boring', 'clear'): 4,
('boring', 'friendly'): 4,
('clear', 'friendly'): 4,
('big', 'few'): 4,
('corrupt', 'good'): 4,
('double', 'many'): 4,
('corrupt', 'double'): 4,
('corrupt', 'many'): 4,
('Other', 'honorable'): 4,
('beggar', 'honorable'): 4,
('honorable', 'hungry'): 4,
('Other', 'beggar'): 4,
('Other', 'hungry'): 4,
('beggar', 'hungry'): 4,
('next', 'same'): 4,
('bad', 'weird'): 4,
('only', 'terrible'): 4,
('professional', 'worst'): 4,
('good', 'professional'): 4,
('least', 'professional'): 4,
('hard', 'professional'): 4,
('constant', 'good'): 4,
('hard', 'least'): 4,
('good', 'pure'): 4,
('basic', 'big'): 4,
('basic', 'little'): 4,
('basic', 'much'): 4,
('basic', 'least'): 4,
('convincing', 'interesting'): 4,
('convincing', 'first'): 4,
('convincing', 'particular'): 4,
('convincing', 'sure'): 4,
('hard', 'main'): 4,
('Spanish', 'pretty'): 4,
('main', 'middle'): 4,
('main', 'popular'): 4,
('certain', 'main'): 4,
('main', 'ready'): 4,
('main', 'mixed'): 4,
('gratuitous', 'main'): 4,
('main', 'vital'): 4,
('impetuous', 'main'): 4,
('boorish', 'main'): 4,
('main', 'military'): 4,
...})
In [7]:
# 上位30件の共起ペアを抽出
top_positive_pairs = cooccurrence_positive.most_common(30)
top_negative_pairs = cooccurrence_negative.most_common(30)
In [ ]:
import csv
from pyvis.network import Network
from collections import defaultdict
def visualize_cooccurrence_with_node_size(top_pairs, tokens_list, title):
"""
共起ペアをネットワークとして可視化し、ノードとエッジをCSVに出力する関数。
ノードのサイズは単語の出現回数に基づいて設定。
"""
net = Network(height="750px", width="100%", notebook=True)
net.force_atlas_2based() # レイアウト設定
# 単語の出現回数を計算
word_counts = defaultdict(int)
for tokens in tokens_list:
for token in tokens:
word_counts[token] += 1
nodes = set() # ノードを管理するセット
edges = [] # エッジを管理するリスト
# ノードとエッジを追加
for (word1, word2), weight in top_pairs:
# ノードを追加(重複を避ける)
if word1 not in nodes:
net.add_node(word1, label=word1, size=word_counts[word1])
nodes.add(word1)
if word2 not in nodes:
net.add_node(word2, label=word2, size=word_counts[word2])
nodes.add(word2)
# エッジを追加
edges.append((word1, word2, weight))
net.add_edge(word1, word2, value=weight)
# ノードをCSVに出力
nodes_csv = f"nodes_{title}.csv"
with open(nodes_csv, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['word', 'size']) # ヘッダー
for node in nodes:
writer.writerow([node, word_counts[node]]) # ノードを保存
print(f"Nodes saved as {nodes_csv}")
# エッジをCSVに出力
edges_csv = f"edges_{title}.csv"
with open(edges_csv, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['word1', 'word2', 'weight']) # ヘッダー
writer.writerows(edges) # エッジを保存
print(f"Edges saved as {edges_csv}")
# HTMLファイルとして保存して表示
output_file = f"{title}.html"
net.show(output_file)
print(f"Visualization saved as {output_file}")
In [25]:
# ポジティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
top_positive_pairs,
review_positive['tokens'],
"positive_cooccurrence_adjectives"
)
Warning: When cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook. Nodes saved as nodes_positive_cooccurrence_adjectives.csv Edges saved as edges_positive_cooccurrence_adjectives.csv positive_cooccurrence_adjectives.html Visualization saved as positive_cooccurrence_adjectives.html
In [27]:
# ネガティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
top_negative_pairs,
review_negative['tokens'],
"negative_cooccurrence_adjectives"
)
Warning: When cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook. Nodes saved as nodes_negative_cooccurrence_adjectives.csv Edges saved as edges_negative_cooccurrence_adjectives.csv negative_cooccurrence_adjectives.html Visualization saved as negative_cooccurrence_adjectives.html
In [ ]: