In [1]:
# 必要なライブラリをインポート
import pandas as pd
import re
from itertools import combinations
from collections import Counter
from pyvis.network import Network
import spacy
# spaCyモデルのロード
nlp = spacy.load("en_core_web_sm")
# 動詞だけを抽出する関数
def extract_verbs(text):
doc = nlp(text)
return [token.text for token in doc if token.pos_ == "VERB"]
# 形容詞だけを抽出する関数
def extract_adjectives(text):
doc = nlp(text)
return [token.text for token in doc if token.pos_ == "ADJ"]
# IMDbデータセットを読み込む
data = pd.read_csv("IMDB Dataset.csv")
data.head()
Out[1]:
review | sentiment | |
---|---|---|
0 | One of the other reviewers has mentioned that ... | positive |
1 | A wonderful little production. <br /><br />The... | positive |
2 | I thought this was a wonderful way to spend ti... | positive |
3 | Basically there's a family where a little boy ... | negative |
4 | Petter Mattei's "Love in the Time of Money" is... | positive |
In [2]:
# ポジティブレビューとネガティブレビューのサンプリング
review_positive = data[data['sentiment'] == 'positive'].sample(n=100, random_state=50)
review_negative = data[data['sentiment'] == 'negative'].sample(n=100, random_state=50)
In [3]:
# ポジティブレビューとネガティブレビューの動詞抽出
#review_positive['tokens'] = review_positive['review'].apply(extract_verbs)
#review_negative['tokens'] = review_negative['review'].apply(extract_verbs)
# ポジティブレビューとネガティブレビューの形容詞抽出
review_positive['tokens'] = review_positive['review'].apply(extract_adjectives)
review_negative['tokens'] = review_negative['review'].apply(extract_adjectives)
In [4]:
# 共起ペア生成関数(順序を無視してカウント)
def generate_cooccurrence(tokens_list):
"""
トークンリストから順序を無視した共起ペアを生成し、頻度をカウント
"""
pairs = []
for tokens in tokens_list:
# 自己ペアを排除し、ソートしたペアを生成
pairs.extend(tuple(sorted((a, b))) for a, b in combinations(tokens, 2) if a != b)
return Counter(pairs)
In [5]:
# ポジティブレビューの共起ペア生成
cooccurrence_positive = generate_cooccurrence(review_positive['tokens'])
cooccurrence_positive
Out[5]:
Counter({('good', 'great'): 38, ('good', 'little'): 30, ('good', 'other'): 29, ('little', 'other'): 23, ('great', 'other'): 23, ('best', 'young'): 23, ('good', 'many'): 22, ('best', 'great'): 22, ('best', 'old'): 22, ('little', 'more'): 20, ('best', 'same'): 20, ('old', 'young'): 20, ('best', 'many'): 19, ('other', 'same'): 19, ('good', 'own'): 18, ('first', 'other'): 18, ('many', 'other'): 17, ('many', 'most'): 16, ('few', 'other'): 16, ('many', 'young'): 16, ('big', 'silent'): 16, ('better', 'good'): 15, ('little', 'own'): 15, ('many', 'same'): 15, ('best', 'new'): 15, ('best', 'other'): 15, ('good', 'nice'): 15, ('good', 'more'): 15, ('good', 'low'): 15, ('funny', 'good'): 15, ('many', 'southern'): 15, ('most', 'southern'): 15, ('real', 'silent'): 15, ('short', 'silent'): 15, ('little', 'many'): 14, ('little', 'most'): 14, ('best', 'little'): 14, ('great', 'new'): 14, ('few', 'good'): 14, ('first', 'good'): 14, ('little', 'such'): 14, ('big', 'great'): 14, ('most', 'other'): 14, ('good', 'such'): 14, ('best', 'good'): 14, ('first', 'little'): 14, ('much', 'other'): 13, ('little', 'same'): 13, ('new', 'same'): 13, ('few', 'great'): 13, ('brilliant', 'other'): 13, ('great', 'many'): 13, ('more', 'other'): 13, ('beautiful', 'great'): 13, ('little', 'low'): 13, ('older', 'younger'): 12, ('great', 'old'): 12, ('new', 'other'): 12, ('good', 'only'): 12, ('great', 'little'): 12, ('good', 'most'): 12, ('full', 'other'): 12, ('other', 'strong'): 12, ('great', 'same'): 12, ('great', 'real'): 12, ('good', 'much'): 11, ('little', 'much'): 11, ('last', 'other'): 11, ('many', 'more'): 11, ('-', 'many'): 11, ('fantastic', 'same'): 11, ('many', 'own'): 11, ('own', 'same'): 11, ('great', 'silent'): 11, ('most', 'young'): 11, ('big', 'little'): 11, ('good', 'real'): 11, ('funny', 'own'): 11, ('great', 'perfect'): 11, ('good', 'same'): 11, ('many', 'old'): 11, ('first', 'same'): 11, ('better', 'other'): 10, ('little', 'real'): 10, ('many', 'new'): 10, ('best', 'most'): 10, ('great', 'own'): 10, ('other', 'own'): 10, ('good', 'several'): 10, ('great', 'strong'): 10, ('little', 'true'): 10, ('few', 'many'): 10, ('only', 'other'): 10, ('great', 'most'): 10, ('other', 'real'): 10, ('funny', 'great'): 10, ('better', 'great'): 10, ('best', 'strong'): 10, ('big', 'short'): 10, ('big', 'real'): 10, ('entertaining', 'good'): 10, ('original', 'other'): 10, ('intelligent', 'southern'): 10, ('other', 'southern'): 10, ('southern', 'young'): 10, ('southern', 'strong'): 10, ('silent', 'sound'): 10, ('silent', 'such'): 10, ('greatest', 'silent'): 10, ('last', 'little'): 9, ('many', 'much'): 9, ('dead', 'own'): 9, ('purgatory', 'same'): 9, ('new', 'purgatory'): 9, ('impossible', 'purgatory'): 9, ('impossible', 'same'): 9, ('best', 'own'): 9, ('impossible', 'new'): 9, ('new', 'real'): 9, ('best', 'real'): 9, ('old', 'own'): 9, ('good', 'old'): 9, ('other', 'young'): 9, ('great', 'such'): 9, ('good', 'least'): 9, ('first', 'full'): 9, ('great', 'only'): 9, ('great', 'long'): 9, ('enough', 'great'): 9, ('other', 'realistic'): 9, ('old', 'same'): 9, ('full', 'little'): 9, ('few', 'same'): 9, ('great', 'original'): 9, ('real', 'short'): 9, ('good', 'last'): 8, ('least', 'more'): 8, ('more', 'same'): 8, ('more', 'own'): 8, ('more', 'new'): 8, ('European', 'little'): 8, ('much', 'same'): 8, ('most', 'same'): 8, ('real', 'same'): 8, ('new', 'own'): 8, ('American', 'young'): 8, ('few', 'first'): 8, ('good', 'young'): 8, ('many', 'strong'): 8, ('few', 'little'): 8, ('many', 'true'): 8, ('enough', 'few'): 8, ('little', 'main'): 8, ('big', 'such'): 8, ('big', 'most'): 8, ('bad', 'good'): 8, ('different', 'good'): 8, ('funny', 'little'): 8, ('same', 'young'): 8, ('brilliant', 'same'): 8, ('best', 'big'): 8, ('great', 'wonderful'): 8, ('best', 'better'): 8, ('new', 'strong'): 8, ('amazing', 'other'): 8, ('best', 'long'): 8, ('beautiful', 'best'): 8, ('best', 'dark'): 8, ('amazing', 'great'): 8, ('good', 'high'): 8, ('innocent', 'little'): 8, ('amusing', 'good'): 8, ('long', 'most'): 8, ('most', 'old'): 8, ('bad', 'better'): 8, ('short', 'such'): 8, ('better', 'much'): 7, ('more', 'much'): 7, ('best', 'more'): 7, ('-', 'little'): 7, ('close', 'little'): 7, ('dead', 'little'): 7, ('least', 'same'): 7, ('much', 'own'): 7, ('much', 'new'): 7, ('most', 'own'): 7, ('best', 'clean'): 7, ('American', 'old'): 7, ('old', 'other'): 7, ('own', 'young'): 7, ('first', 'own'): 7, ('important', 'most'): 7, ('good', 'memorable'): 7, ('few', 'only'): 7, ('little', 'sure'): 7, ('little', 'only'): 7, ('other', 'sure'): 7, ('many', 'such'): 7, ('bad', 'many'): 7, ('American', 'many'): 7, ('big', 'good'): 7, ('bad', 'most'): 7, ('few', 'most'): 7, ('different', 'other'): 7, ('great', 'short'): 7, ('better', 'many'): 7, ('other', 'various'): 7, ('best', 'true'): 7, ('same', 'various'): 7, ('same', 'true'): 7, ('great', 'true'): 7, ('charming', 'little'): 7, ('little', 'short'): 7, ('nice', 'short'): 7, ('low', 'other'): 7, ('good', 'interesting'): 7, ('good', 'true'): 7, ('better', 'entertaining'): 7, ('great', 'realistic'): 7, ('hot', 'nice'): 7, ('long', 'old'): 7, ('beautiful', 'many'): 7, ('fantastic', 'other'): 7, ('good', 'wild'): 7, ('effective', 'good'): 7, ('original', 'same'): 7, ('fantastic', 'original'): 7, ('full', 'same'): 7, ('high', 'other'): 7, ('good', 'special'): 7, ('most', 'strong'): 7, ('most', 'short'): 7, ('best', 'comic'): 7, ('comic', 'little'): 7, ('many', 'serious'): 7, ('new', 'realistic'): 7, ('anti', 'little'): 7, ('amazing', 'realistic'): 7, ('last', 'much'): 6, ('classic', 'much'): 6, ('classic', 'good'): 6, ('better', 'little'): 6, ('-', 'more'): 6, ('close', 'more'): 6, ('entertaining', 'many'): 6, ('little', 'new'): 6, ('European', 'many'): 6, ('dead', 'many'): 6, ('many', 'purgatory'): 6, ('impossible', 'many'): 6, ('best', 'close'): 6, ('dead', 'purgatory'): 6, ('dead', 'same'): 6, ('dead', 'new'): 6, ('dead', 'impossible'): 6, ('own', 'purgatory'): 6, ('best', 'purgatory'): 6, ('general', 'same'): 6, ('general', 'own'): 6, ('impossible', 'own'): 6, ('best', 'general'): 6, ('best', 'impossible'): 6, ('old', 'silent'): 6, ('good', 'new'): 6, ('good', 'pretty'): 6, ('brilliant', 'many'): 6, ('amazing', 'many'): 6, ('able', 'many'): 6, ('many', 'non'): 6, ('many', 'sure'): 6, ('many', 'sensitive'): 6, ('many', 'only'): 6, ('other', 'true'): 6, ('brilliant', 'little'): 6, ('famous', 'little'): 6, ('important', 'many'): 6, ('big', 'famous'): 6, ('important', 'other'): 6, ('bad', 'other'): 6, ('able', 'famous'): 6, ('own', 'such'): 6, ('classic', 'famous'): 6, ('bad', 'great'): 6, ('famous', 'much'): 6, ('great', 'local'): 6, ('great', 'last'): 6, ('American', 'most'): 6, ('little', 'young'): 6, ('funny', 'more'): 6, ('many', 'subtle'): 6, ('corrupt', 'ordinary'): 6, ('Estonian', 'ordinary'): 6, ('better', 'first'): 6, ('entire', 'real'): 6, ('first', 'real'): 6, ('entire', 'good'): 6, ('great', 'more'): 6, ('better', 'same'): 6, ('better', 'new'): 6, ('beautiful', 'strong'): 6, ('best', 'emotional'): 6, ('beautiful', 'true'): 6, ('new', 'true'): 6, ('easy', 'good'): 6, ('only', 'own'): 6, ('most', 'only'): 6, ('more', 'such'): 6, ('high', 'own'): 6, ('low', 'own'): 6, ('funny', 'such'): 6, ('good', 'innocent'): 6, ('good', 'wonderful'): 6, ('dramatic', 'good'): 6, ('new', 'original'): 6, ('great', 'worse'): 6, ('beautiful', 'good'): 6, ('long', 'many'): 6, ('big', 'right'): 6, ('only', 'strong'): 6, ('first', 'more'): 6, ('brilliant', 'original'): 6, ('high', 'same'): 6, ('intelligent', 'many'): 6, ('intelligent', 'most'): 6, ('realistic', 'strong'): 6, ('American', 'best'): 6, ('funny', 'perfect'): 6, ('good', 'perfect'): 6, ('only', 'real'): 6, ('greatest', 'old'): 6, ('best', 'greatest'): 6, ('comic', 'good'): 6, ('complex', 'little'): 6, ('average', 'good'): 6, ('good', 'ready'): 6, ('little', 'realistic'): 6, ('difficult', 'little'): 6, ('big', 'sound'): 6, ('big', 'greatest'): 6, ('silent', 'special'): 6, ('real', 'sound'): 6, ('short', 'sound'): 6, ('real', 'such'): 6, ('greatest', 'real'): 6, ('greatest', 'short'): 6, ('half', 'little'): 6, ('comic', 'nice'): 6, ('married', 'nice'): 6, ('desperate', 'new'): 6, ('dated', 'new'): 6, ('better', 'cheap'): 6, ('much', 'older'): 5, ('last', 'older'): 5, ('last', 'younger'): 5, ('classic', 'little'): 5, ('dead', 'more'): 5, ('more', 'most'): 5, ('more', 'real'): 5, ('more', 'ready'): 5, ('entertaining', 'same'): 5, ('-', 'same'): 5, ('general', 'modern'): 5, ('initial', 'little'): 5, ('little', 'sexual'): 5, ('little', 'ready'): 5, ('least', 'many'): 5, ('least', 'own'): 5, ('many', 'religious'): 5, ('fantastic', 'own'): 5, ('European', 'same'): 5, ('close', 'many'): 5, ('many', 'single'): 5, ('many', 'ready'): 5, ('close', 'same'): 5, ('mere', 'own'): 5, ('clean', 'same'): 5, ('same', 'single'): 5, ('general', 'new'): 5, ('general', 'old'): 5, ('few', 'old'): 5, ('American', 'great'): 5, ('American', 'other'): 5, ('American', 'good'): 5, ('great', 'young'): 5, ('first', 'great'): 5, ('general', 'other'): 5, ('general', 'young'): 5, ('other', 'worth'): 5, ('important', 'young'): 5, ('great', 'similar'): 5, ('able', 'good'): 5, ('alive', 'good'): 5, ('able', 'few'): 5, ('clear', 'little'): 5, ('little', 'sweet'): 5, ('historical', 'many'): 5, ('few', 'much'): 5, ('European', 'other'): 5, ('more', 'only'): 5, ('other', 'physical'): 5, ('bad', 'little'): 5, ('American', 'little'): 5, ('big', 'own'): 5, ('bad', 'big'): 5, ('main', 'other'): 5, ('able', 'great'): 5, ('most', 'such'): 5, ('brilliant', 'much'): 5, ('brilliant', 'last'): 5, ('bad', 'much'): 5, ('good', 'main'): 5, ('female', 'first'): 5, ('different', 'strong'): 5, ('little', 'next'): 5, ('little', 'obvious'): 5, ('incredible', 'same'): 5, ('brilliant', 'more'): 5, ('favorite', 'funny'): 5, ('funny', 'same'): 5, ('same', 'sure'): 5, ('dirty', 'young'): 5, ('best', 'dirty'): 5, ('different', 'most'): 5, ('interesting', 'short'): 5, ('better', 'entire'): 5, ('better', 'strong'): 5, ('important', 'strong'): 5, ('difficult', 'real'): 5, ('real', 'strong'): 5, ('many', 'simple'): 5, ('other', 'wonderful'): 5, ('funny', 'other'): 5, ('best', 'perfect'): 5, ('great', 'serious'): 5, ('long', 'other'): 5, ('beautiful', 'other'): 5, ('best', 'important'): 5, ('amazing', 'best'): 5, ('long', 'same'): 5, ('bold', 'great'): 5, ('beautiful', 'same'): 5, ('great', 'technical'): 5, ('charming', 'good'): 5, ('easy', 'little'): 5, ('low', 'more'): 5, ('innocent', 'more'): 5, ('high', 'little'): 5, ('charismatic', 'own'): 5, ('good', 'romantic'): 5, ('innocent', 'such'): 5, ('greatest', 'such'): 5, ('dead', 'good'): 5, ('little', 'smart'): 5, ('better', 'funny'): 5, ('bad', 'same'): 5, ('fresh', 'good'): 5, ('great', 'high'): 5, ('many', 'social'): 5, ('few', 'high'): 5, ('other', 'silly'): 5, ('first', 'nice'): 5, ('many', 'surprised'): 5, ('bad', 'old'): 5, ('absolute', 'other'): 5, ('good', 'worse'): 5, ('bad', 'fantastic'): 5, ('first', 'low'): 5, ('old', 'right'): 5, ('little', 'right'): 5, ('funny', 'old'): 5, ('big', 'old'): 5, ('brilliant', 'first'): 5, ('last', 'original'): 5, ('other', 'pure'): 5, ('great', 'top'): 5, ('sensitive', 'southern'): 5, ('social', 'southern'): 5, ('civil', 'southern'): 5, ('southern', 'subtle'): 5, ('realistic', 'southern'): 5, ('inside', 'southern'): 5, ('non', 'southern'): 5, ('-', 'southern'): 5, ('slave', 'southern'): 5, ('common', 'southern'): 5, ('dull', 'southern'): 5, ('historical', 'southern'): 5, ('simple', 'southern'): 5, ('confederate', 'southern'): 5, ('brilliant', 'southern'): 5, ('important', 'southern'): 5, ('southern', 'third'): 5, ('liberal', 'southern'): 5, ('skilled', 'southern'): 5, ('modest', 'southern'): 5, ('close', 'southern'): 5, ('southern', 'tough'): 5, ('drunk', 'southern'): 5, ('negligent', 'southern'): 5, ('inseparable', 'southern'): 5, ('best', 'southern'): 5, ('different', 'southern'): 5, ('long', 'southern'): 5, ('intense', 'southern'): 5, ('passionate', 'southern'): 5, ('only', 'southern'): 5, ('pure', 'southern'): 5, ('same', 'southern'): 5, ('old', 'southern'): 5, ('powerful', 'southern'): 5, ('breathtaking', 'southern'): 5, ('beautiful', 'southern'): 5, ('Appalachian', 'southern'): 5, ('broad', 'southern'): 5, ('educational', 'southern'): 5, ('middle', 'southern'): 5, ('lower', 'southern'): 5, ('short', 'southern'): 5, ('American', 'southern'): 5, ('few', 'southern'): 5, ('gifted', 'southern'): 5, ('southern', 'vocal'): 5, ('outstanding', 'southern'): 5, ('most', 'third'): 5, ('most', 'outstanding'): 5, ('long', 'short'): 5, ('best', 'original'): 5, ('best', 'funny'): 5, ('enough', 'same'): 5, ('good', 'whole'): 5, ('crazy', 'good'): 5, ('clean', 'old'): 5, ('cinematic', 'old'): 5, ('fresh', 'old'): 5, ('dark', 'old'): 5, ('personal', 'young'): 5, ('comic', 'young'): 5, ('best', 'cinematic'): 5, ('best', 'fresh'): 5, ('better', 'worth'): 5, ('bad', 'worth'): 5, ('bad', 'sure'): 5, ('hard', 'little'): 5, ('good', 'sweet'): 5, ('good', 'preachy'): 5, ('nice', 'such'): 5, ('amazing', 'good'): 5, ('best', 'such'): 5, ('old', 'perfect'): 5, ('old', 'such'): 5, ('best', 'first'): 5, ('first', 'worth'): 5, ('direct', 'silent'): 5, ('1st', 'silent'): 5, ('early', 'silent'): 5, ('little', 'silent'): 5, ('silent', 'top'): 5, ('familiar', 'silent'): 5, ('Later', 'silent'): 5, ('silent', 'similar'): 5, ('glorious', 'silent'): 5, ('pristine', 'silent'): 5, ('silent', 'uphill'): 5, ('positive', 'silent'): 5, ('double', 'silent'): 5, ('silent', 'technical'): 5, ('most', 'silent'): 5, ('fortunate', 'silent'): 5, ('instrumental', 'silent'): 5, ('best', 'silent'): 5, ('difficult', 'silent'): 5, ('entire', 'silent'): 5, ('silent', 'wise'): 5, ('selective', 'silent'): 5, ('next', 'silent'): 5, ('silent', 'subject'): 5, ('right', 'silent'): 5, ('popular', 'silent'): 5, ('Punctured', 'silent'): 5, ('concrete', 'silent'): 5, ('long', 'silent'): 5, ('silent', 'silver'): 5, ('silent', 'subdued'): 5, ('early', 'short'): 5, ('female', 'little'): 5, ('best', 'low'): 5, ('much', 'younger'): 4, ('better', 'last'): 4, ('older', 'other'): 4, ('favourite', 'good'): 4, ('better', 'classic'): 4, ('entertaining', 'more'): 4, ('European', 'more'): 4, ('general', 'more'): 4, ('entertaining', 'new'): 4, ('best', 'entertaining'): 4, ('many', 'rich'): 4, ('rich', 'same'): 4, ('-', 'most'): 4, ('-', 'new'): 4, ('modern', 'own'): 4, ('fantastic', 'little'): 4, ('impossible', 'little'): 4, ('fantastic', 'least'): 4, ('least', 'new'): 4, ('least', 'real'): 4, ('new', 'religious'): 4, ('best', 'religious'): 4, ('human', 'same'): 4, ('fantastic', 'much'): 4, ('dead', 'much'): 4, ('most', 'much'): 4, ('best', 'much'): 4, ('fantastic', 'many'): 4, ('fantastic', 'new'): 4, ('best', 'fantastic'): 4, ('contemporary', 'same'): 4, ('European', 'best'): 4, ('many', 'predictable'): 4, ('many', 'possible'): 4, ('many', 'real'): 4, ('close', 'most'): 4, ('close', 'new'): 4, ('own', 'possible'): 4, ('new', 'possible'): 4, ('best', 'possible'): 4, ('best', 'dead'): 4, ('dead', 'real'): 4, ('initial', 'same'): 4, ('same', 'sexual'): 4, ('less', 'same'): 4, ('most', 'new'): 4, ('impossible', 'most'): 4, ('most', 'real'): 4, ('huge', 'own'): 4, ('own', 'real'): 4, ('own', 'single'): 4, ('huge', 'new'): 4, ('general', 'huge'): 4, ('new', 'ready'): 4, ('best', 'initial'): 4, ('best', 'sexual'): 4, ('best', 'less'): 4, ('real', 'sexual'): 4, ('American', 'few'): 4, ('American', 'own'): 4, ('general', 'great'): 4, ('few', 'general'): 4, ('first', 'general'): 4, ('few', 'own'): 4, ('few', 'young'): 4, ('handsome', 'own'): 4, ('other', 'several'): 4, ('major', 'other'): 4, ('good', 'handsome'): 4, ('brilliant', 'own'): 4, ('bad', 'best'): 4, ('low', 'young'): 4, ('surprised', 'young'): 4, ('brilliant', 'great'): 4, ('good', 'strong'): 4, ('happy', 'many'): 4, ('linear', 'little'): 4, ('confusing', 'little'): 4, ('enough', 'little'): 4, ('little', 'loud'): 4, ('many', 'slow'): 4, ('linear', 'many'): 4, ('confusing', 'many'): 4, ('clear', 'many'): 4, ('interested', 'many'): 4, ('major', 'many'): 4, ('few', 'more'): 4, ('few', 'sure'): 4, ('-', 'non'): 4, ('non', 'other'): 4, ('-', 'other'): 4, ('confusing', 'linear'): 4, ('clear', 'linear'): 4, ('linear', 'sure'): 4, ('European', 'sure'): 4, ('more', 'true'): 4, ('clear', 'confusing'): 4, ('confusing', 'sure'): 4, ('clear', 'sure'): 4, ('much', 'sure'): 4, ('more', 'sure'): 4, ('sure', 'sweet'): 4, ('enough', 'other'): 4, ('other', 'sensitive'): 4, ('able', 'little'): 4, ('big', 'many'): 4, ('gifted', 'many'): 4, ('local', 'many'): 4, ('big', 'other'): 4, ('able', 'big'): 4, ('big', 'important'): 4, ('big', 'classic'): 4, ('big', 'main'): 4, ('big', 'much'): 4, ('able', 'other'): 4, ('able', 'own'): 4, ('able', 'classic'): 4, ('able', 'much'): 4, ('gifted', 'most'): 4, ('great', 'important'): 4, ('brilliant', 'few'): 4, ('brilliant', 'most'): 4, ('classic', 'great'): 4, ('great', 'much'): 4, ('few', 'local'): 4, ('good', 'local'): 4, ('sensual', 'younger'): 4, ('dark', 'good'): 4, ('female', 'good'): 4, ('good', 'sexual'): 4, ('enough', 'good'): 4, ('first', 'sexual'): 4, ('more', 'young'): 4, ('American', 'same'): 4, ('funny', 'sad'): 4, ('funny', 'many'): 4, ('obvious', 'same'): 4, ('comedic', 'great'): 4, ('many', 'mighty'): 4, ('new', 'numerous'): 4, ('Estonian', 'corrupt'): 4, ('different', 'many'): 4, ('different', 'new'): 4, ('beautiful', 'excellent'): 4, ('excellent', 'great'): 4, ('great', 'strange'): 4, ('beautiful', 'own'): 4, ('least', 'other'): 4, ('better', 'real'): 4, ('better', 'subtle'): 4, ('other', 'simple'): 4, ('difficult', 'other'): 4, ('hard', 'other'): 4, ('necessary', 'other'): 4, ('many', 'wrong'): 4, ('first', 'half'): 4, ('entire', 'simple'): 4, ('simple', 'strong'): 4, ('respectful', 'simple'): 4, ('difficult', 'first'): 4, ('difficult', 'good'): 4, ('entire', 'respectful'): 4, ('-', 'good'): 4, ('good', 'hard'): 4, ('good', 'responsible'): 4, ('interesting', 'other'): 4, ('great', 'impressive'): 4, ('best', 'only'): 4, ('close', 'young'): 4, ('best', 'serious'): 4, ('last', 'more'): 4, ('long', 'strong'): 4, ('same', 'strong'): 4, ('amazing', 'strong'): 4, ('best', 'wonderful'): 4, ('best', 'technical'): 4, ('Young', 'great'): 4, ('beautiful', 'long'): 4, ('dark', 'great'): 4, ('beautiful', 'new'): 4, ('amazing', 'new'): 4, ('charming', 'low'): 4, ('favorite', 'good'): 4, ('high', 'most'): 4, ('funny', 'low'): 4, ('endless', 'own'): 4, ('amusing', 'own'): 4, ('own', 'private'): 4, ('greatest', 'own'): 4, ('clever', 'own'): 4, ('better', 'own'): 4, ('endless', 'good'): 4, ('good', 'private'): 4, ('good', 'proper'): 4, ('good', 'greatest'): 4, ('good', 'smart'): 4, ('greatest', 'little'): 4, ('wonderful', 'younger'): 4, ('fresh', 'little'): 4, ('little', 'nice'): 4, ('little', 'wonderful'): 4, ('big', 'same'): 4, ('funny', 'nice'): 4, ('big', 'funny'): 4, ('good', 'short'): 4, ('great', 'surf'): 4, ('great', 'least'): 4, ('low', 'real'): 4, ('little', 'traumatic'): 4, ('big', 'entire'): 4, ('good', 'traumatic'): 4, ('black', 'good'): 4, ('good', 'white'): 4, ('good', 'silly'): 4, ('beautiful', 'special'): 4, ('nice', 'true'): 4, ('first', 'true'): 4, ('last', 'nice'): 4, ('first', 'last'): 4, ('good', 'original'): 4, ('better', 'original'): 4, ('better', 'nice'): 4, ('good', 'surprised'): 4, ('glad', 'good'): 4, ('good', 'hot'): 4, ('great', 'nice'): 4, ('own', 'true'): 4, ('old', 'true'): 4, ('absolute', 'same'): 4, ('same', 'silly'): 4, ('interesting', 'nice'): 4, ('first', 'much'): 4, ('great', 'right'): 4, ('own', 'right'): 4, ('first', 'funny'): 4, ('final', 'true'): 4, ('full', 'own'): 4, ('final', 'own'): 4, ('big', 'wise'): 4, ('much', 'original'): 4, ('first', 'original'): 4, ('first', 'various'): 4, ('first', 'high'): 4, ('fantastic', 'first'): 4, ('original', 'various'): 4, ('full', 'original'): 4, ('more', 'original'): 4, ('final', 'same'): 4, ('pure', 'same'): 4, ('other', 'slapstick'): 4, ('apparent', 'other'): 4, ('brilliant', 'various'): 4, ('fantastic', 'various'): 4, ('full', 'various'): 4, ('brilliant', 'fantastic'): 4, ('brilliant', 'full'): 4, ('fantastic', 'full'): 4, ('few', 'full'): 4, ('fresh', 'most'): 4, ('most', 'top'): 4, ('inventive', 'most'): 4, ('little', 'top'): 4, ('inventive', 'little'): 4, ('great', 'special'): 4, ('intelligent', 'other'): 4, ('intelligent', 'young'): 4, ('intelligent', 'strong'): 4, ('many', 'realistic'): 4, ('most', 'realistic'): 4, ('close', 'other'): 4, ('other', 'powerful'): 4, ('beautiful', 'young'): 4, ('strong', 'young'): 4, ('best', 'short'): 4, ('old', 'short'): 4, ('old', 'outstanding'): 4, ('powerful', 'strong'): 4, ('original', 'perfect'): 4, ('funny', 'original'): 4, ('great', 'whole'): 4, ('enough', 'real'): 4, ('few', 'real'): 4, ('new', 'only'): 4, ('best', 'early'): 4, ('old', 'pleased'): 4, ('pleased', 'young'): 4, ('best', 'pleased'): 4, ('greatest', 'young'): 4, ('old', 'personal'): 4, ('conflicting', 'old'): 4, ('further', 'old'): 4, ('old', 'potential'): 4, ('evil', 'old'): 4, ('dirty', 'old'): 4, ('old', 'visual'): 4, ('lasting', 'old'): 4, ('comic', 'old'): 4, ('conflicting', 'young'): 4, ('further', 'young'): 4, ('potential', 'young'): 4, ('evil', 'young'): 4, ('clean', 'young'): 4, ('visual', 'young'): 4, ('cinematic', 'young'): 4, ('fresh', 'young'): 4, ('lasting', 'young'): 4, ('dark', 'young'): 4, ('best', 'personal'): 4, ('best', 'conflicting'): 4, ('best', 'further'): 4, ('best', 'potential'): 4, ('best', 'evil'): 4, ('best', 'visual'): 4, ('best', 'lasting'): 4, ('bad', 'dark'): 4, ('Beguiled', 'bad'): 4, ('bad', 'sweet'): 4, ('bad', 'high'): 4, ('most', 'right'): 4, ('Beguiled', 'most'): 4, ('early', 'most'): 4, ('most', 'sweet'): 4, ('Beguiled', 'sweet'): 4, ('big', 'early'): 4, ('cheap', 'long'): 4, ('big', 'long'): 4, ('available', 'more'): 4, ('nice', 'sweet'): 4, ('good', 'subliminal'): 4, ('good', 'thin'): 4, ('good', 'scary'): 4, ('amusing', 'nice'): 4, ('nice', 'perfect'): 4, ('catchy', 'good'): 4, ('engaging', 'good'): 4, ('good', 'hilarious'): 4, ('emphatic', 'good'): 4, ('good', 'intelligent'): 4, ('good', 'scared'): 4, ('good', 'worthy'): 4, ('good', 'solid'): 4, ('good', 'superb'): 4, ('good', 'unrecognisable'): 4, ('perfect', 'such'): 4, ('many', 'several'): 4, ('High', 'many'): 4, ('amazing', 'little'): 4, ('good', 'serious'): 4, ('about', 'more'): 4, ('true', 'young'): 4, ('same', 'wild'): 4, ('long', 'such'): 4, ('old', 'rare'): 4, ('perfect', 'rare'): 4, ('great', 'rare'): 4, ('funny', 'rare'): 4, ('available', 'funny'): 4, ('funny', 'hysterical'): 4, ('next', 'such'): 4, ('new', 'next'): 4, ('difficult', 'short'): 4, ('right', 'short'): 4, ('/>My', 'good'): 4, ('good', 'impossible'): 4, ('last', 'new'): 4, ('older', 'true'): 4, ('Tomanian', 'heroic'): 4, ('Jewish', 'Tomanian'): 4, ('more', 'ridiculous'): 4, ('Jewish', 'heroic'): 4, ('entertaining', 'first'): 4, ('entertaining', 'worth'): 4, ('good', 'worth'): 4, ('real', 'right'): 4, ('old', 'real'): 4, ('big', 'special'): 4, ('early', 'such'): 4, ('great', 'sound'): 4, ('sound', 'such'): 4, ('greatest', 'sound'): 4, ('great', 'greatest'): 4, ('real', 'similar'): 4, ('next', 'real'): 4, ('funny', 'modern'): 4, ('funny', 'general'): 4, ('near', 'realistic'): 4, ('amazing', 'perfect'): 4, ('easy', 'perfect'): 4, ('perfect', 'realistic'): 4, ('amazing', 'easy'): 4, ('easy', 'realistic'): 4, ('comic', 'short'): 4, ('married', 'short'): 4, ('hot', 'short'): 4, ('comic', 'married'): 4, ('comic', 'hot'): 4, ('comic', 'first'): 4, ('hot', 'married'): 4, ('desperate', 'realistic'): 4, ('dated', 'realistic'): 4, ('desperate', 'other'): 4, ('desperate', 'strong'): 4, ('dated', 'desperate'): 4, ('new', 'typical'): 4, ('cool', 'other'): 4, ('dated', 'other'): 4, ('dated', 'strong'): 4, ('amazing', 'convincing'): 4, ('amazing', 'bad'): 4, ('convincing', 'other'): 4, ('bad', 'convincing'): 4, ('other', 'sexual'): 4, ('Christian', 'surprised'): 4, ('serious', 'surprised'): 4, ...})
In [6]:
# ネガティブレビューの共起ペア生成
cooccurrence_negative = generate_cooccurrence(review_negative['tokens'])
cooccurrence_negative
Out[6]:
Counter({('bad', 'good'): 51, ('bad', 'terrible'): 27, ('good', 'red'): 24, ('good', 'terrible'): 23, ('better', 'good'): 23, ('good', 'many'): 23, ('good', 'other'): 22, ('main', 'red'): 21, ('bad', 'same'): 20, ('bad', 'first'): 20, ('good', 'little'): 19, ('good', 'such'): 19, ('bad', 'better'): 18, ('funny', 'many'): 18, ('first', 'good'): 17, ('bad', 'many'): 17, ('bad', 'last'): 16, ('bad', 'other'): 16, ('good', 'great'): 16, ('good', 'stupid'): 16, ('evil', 'such'): 16, ('good', 'more'): 16, ('good', 'real'): 16, ('better', 'many'): 16, ('genetic', 'psychical'): 16, ('bad', 'little'): 15, ('bad', 'particular'): 15, ('awful', 'red'): 15, ('modern', 'red'): 15, ('bad', 'whole'): 14, ('bad', 'worst'): 14, ('good', 'same'): 14, ('-', 'real'): 14, ('good', 'least'): 14, ('awful', 'bad'): 14, ('bad', 'stupid'): 14, ('bad', 'new'): 14, ('bad', 'funny'): 14, ('good', 'main'): 14, ('big', 'red'): 14, ('evil', 'red'): 14, ('more', 'same'): 14, ('first', 'more'): 14, ('black', 'red'): 14, ('red', 'such'): 14, ('emotional', 'red'): 14, ('baroque', 'red'): 14, ('famous', 'red'): 14, ('good', 'young'): 14, ('terrible', 'worst'): 13, ('bad', 'interesting'): 13, ('good', 'last'): 13, ('real', 'true'): 13, ('big', 'good'): 13, ('better', 'funny'): 13, ('many', 'more'): 13, ('good', 'interesting'): 12, ('better', 'other'): 12, ('first', 'great'): 12, ('funny', 'good'): 12, ('good', 'much'): 12, ('awful', 'good'): 12, ('first', 'same'): 12, ('bad', 'horrible'): 12, ('good', 'horrible'): 12, ('first', 'little'): 12, ('new', 'same'): 12, ('evil', 'great'): 12, ('bad', 'more'): 12, ('evil', 'good'): 12, ('criminal', 'real'): 12, ('least', 'real'): 12, ('Nazi', 'real'): 12, ('first', 'sexy'): 12, ('more', 'sexy'): 12, ('many', 'such'): 12, ('good', 'hard'): 11, ('best', 'good'): 11, ('bad', 'great'): 11, ('first', 'sure'): 11, ('good', 'new'): 11, ('-', 'good'): 11, ('first', 'red'): 11, ('bad', 'best'): 10, ('bad', 'worse'): 10, ('old', 'other'): 10, ('first', 'other'): 10, ('only', 'special'): 10, ('emotional', 'good'): 10, ('first', 'much'): 10, ('good', 'special'): 10, ('bad', 'slow'): 10, ('better', 'great'): 10, ('ridiculous', 'such'): 10, ('main', 'more'): 10, ('main', 'same'): 10, ('bad', 'red'): 10, ('more', 'red'): 10, ('emotional', 'such'): 10, ('red', 'same'): 10, ('better', 'more'): 10, ('hard', 'terrible'): 9, ('better', 'terrible'): 9, ('good', 'worst'): 9, ('first', 'small'): 9, ('good', 'own'): 9, ('-', 'bad'): 9, ('bad', 'such'): 9, ('awful', 'same'): 9, ('other', 'same'): 9, ('good', 'only'): 9, ('bad', 'different'): 9, ('good', 'second'): 9, ('bad', 'only'): 9, ('little', 'only'): 9, ('many', 'real'): 9, ('horrible', 'real'): 9, ('great', 'many'): 9, ('bad', 'small'): 8, ('bad', 'convincing'): 8, ('first', 'interesting'): 8, ('bad', 'serious'): 8, ('bad', 'own'): 8, ('bad', 'sure'): 8, ('high', 'least'): 8, ('least', 'slow'): 8, ('least', 'same'): 8, ('first', 'many'): 8, ('black', 'emotional'): 8, ('same', 'slow'): 8, ('first', 'special'): 8, ('black', 'good'): 8, ('good', 'tough'): 8, ('first', 'last'): 8, ('last', 'little'): 8, ('big', 'much'): 8, ('much', 'red'): 8, ('last', 'main'): 8, ('last', 'red'): 8, ('big', 'main'): 8, ('fantastic', 'such'): 8, ('red', 'ridiculous'): 8, ('own', 'such'): 8, ('own', 'red'): 8, ('interesting', 'red'): 8, ('great', 'other'): 8, ('new', 'red'): 8, ('original', 'red'): 8, ('-', 'red'): 8, ('good', 'most'): 8, ('most', 'real'): 8, ('horrible', 'other'): 8, ('absurd', 'many'): 8, ('sexy', 'small'): 8, ('sexy', 'silly'): 8, ('Sunny', 'sexy'): 8, ('sexy', 'wounded'): 8, ('medical', 'sexy'): 8, ('azure', 'beautiful'): 8, ('beautiful', 'female'): 8, ('bad', 'hot'): 8, ('bad', 'clear'): 8, ('least', 'terrible'): 8, ('bad', 'insane'): 8, ('Spanish', 'main'): 8, ('main', 'pretty'): 8, ('genetic', 'whole'): 8, ('psychical', 'whole'): 8, ('genetic', 'psychic'): 8, ('psychic', 'psychical'): 8, ('bad', 'genetic'): 8, ('bad', 'psychical'): 8, ('terrible', 'whole'): 7, ('bad', 'hard'): 7, ('better', 'hard'): 7, ('better', 'interesting'): 7, ('last', 'same'): 7, ('OK', 'black'): 7, ('great', 'interesting'): 7, ('first', 'own'): 7, ('good', 'short'): 7, ('little', 'much'): 7, ('bad', 'familiar'): 7, ('Bad', 'bad'): 7, ('bad', 'black'): 7, ('lazy', 'real'): 7, ('first', 'high'): 7, ('awful', 'other'): 7, ('awful', 'first'): 7, ('awful', 'black'): 7, ('funny', 'other'): 7, ('many', 'same'): 7, ('first', 'only'): 7, ('old', 'same'): 7, ('first', 'slow'): 7, ('boring', 'first'): 7, ('first', 'silly'): 7, ('poor', 'same'): 7, ('awful', 'such'): 7, ('little', 'stupid'): 7, ('dimensional', 'evil'): 7, ('evil', 'much'): 7, ('evil', 'first'): 7, ('evil', 'horrible'): 7, ('20th', 'evil'): 7, ('evil', 'sympathetic'): 7, ('boring', 'great'): 7, ('glossy', 'red'): 7, ('more', 'much'): 7, ('impuissant', 'red'): 7, ('interested', 'main'): 7, ('interested', 'red'): 7, ('bizarre', 'red'): 7, ('red', 'very'): 7, ('fortunate', 'red'): 7, ('red', 'tenuous'): 7, ('red', 'tremendous'): 7, ('latter', 'red'): 7, ('general', 'main'): 7, ('general', 'red'): 7, ('average', 'red'): 7, ('quotidian', 'red'): 7, ('impressive', 'red'): 7, ('grotesque', 'red'): 7, ('fantastic', 'red'): 7, ('good', 'ridiculous'): 7, ('evil', 'main'): 7, ('milquetoast', 'red'): 7, ('final', 'red'): 7, ('corporate', 'red'): 7, ('red', 'spiritual'): 7, ('red', 'religious'): 7, ('altruistic', 'red'): 7, ('red', 'sympathetic'): 7, ('blunt', 'red'): 7, ('dimensional', 'red'): 7, ('modern', 'more'): 7, ('more', 'other'): 7, ('good', 'modern'): 7, ('elderly', 'red'): 7, ('confused', 'red'): 7, ('definite', 'red'): 7, ('indispensable', 'red'): 7, ('modern', 'same'): 7, ('funerary', 'red'): 7, ('red', 'suicidal'): 7, ('red', 'simple'): 7, ('red', 'second'): 7, ('other', 'red'): 7, ('appropriate', 'red'): 7, ('red', 'sacred'): 7, ('extreme', 'red'): 7, ('inherent', 'red'): 7, ('bold', 'red'): 7, ('receptive', 'red'): 7, ('indulgent', 'red'): 7, ('red', 'risky'): 7, ('red', 'vulnerable'): 7, ('enormous', 'red'): 7, ('earliest', 'red'): 7, ('dead', 'red'): 7, ('Hebrew', 'red'): 7, ('anachronistic', 'red'): 7, ('accepted', 'red'): 7, ('mysterious', 'red'): 7, ('frieze', 'red'): 7, ('complete', 'red'): 7, ('red', 'resplendent'): 7, ('formal', 'red'): 7, ('musical', 'red'): 7, ('quasi', 'red'): 7, ('great', 'red'): 7, ('red', 'restive'): 7, ('absolute', 'red'): 7, ('best', 'red'): 7, ('masterful', 'red'): 7, ('dramatic', 'red'): 7, ('poignant', 'red'): 7, ('alive', 'red'): 7, ('minor', 'red'): 7, ('first', 'new'): 7, ('brutal', 'real'): 7, ('big', 'least'): 7, ('horrible', 'many'): 7, ('funny', 'more'): 7, ('big', 'little'): 7, ('bad', 'pathetic'): 7, ('bad', 'young'): 7, ('bad', 'major'): 7, ('bad', 'perfect'): 6, ('terrible', 'top'): 6, ('good', 'top'): 6, ('good', 'whole'): 6, ('better', 'whole'): 6, ('good', 'small'): 6, ('OK', 'good'): 6, ('interesting', 'same'): 6, ('bad', 'decent'): 6, ('generous', 'plastic'): 6, ('funny', 'stupid'): 6, ('funny', 'plastic'): 6, ('convincing', 'good'): 6, ('funny', 'great'): 6, ('interesting', 'own'): 6, ('Original', 'bad'): 6, ('little', 'many'): 6, ('bad', 'much'): 6, ('bad', 'predictable'): 6, ('black', 'first'): 6, ('better', 'first'): 6, ('-', 'true'): 6, ('black', 'fair'): 6, ('bad', 'original'): 6, ('high', 'slow'): 6, ('OK', 'high'): 6, ('good', 'high'): 6, ('awful', 'least'): 6, ('least', 'only'): 6, ('OK', 'least'): 6, ('least', 'special'): 6, ('least', 'much'): 6, ('other', 'whole'): 6, ('many', 'whole'): 6, ('awful', 'emotional'): 6, ('awful', 'slow'): 6, ('only', 'other'): 6, ('OK', 'emotional'): 6, ('OK', 'slow'): 6, ('slow', 'special'): 6, ('good', 'slow'): 6, ('OK', 'special'): 6, ('OK', 'fancy'): 6, ('much', 'special'): 6, ('good', 'single'): 6, ('average', 'good'): 6, ('much', 'same'): 6, ('black', 'same'): 6, ('first', 'such'): 6, ('awful', 'terrible'): 6, ('bad', 'female'): 6, ('few', 'good'): 6, ('second', 'stupid'): 6, ('bad', 'second'): 6, ('first', 'second'): 6, ('little', 'second'): 6, ('bad', 'single'): 6, ('bad', 'special'): 6, ('first', 'tough'): 6, ('first', 'horrible'): 6, ('little', 'special'): 6, ('many', 'new'): 6, ('infamous', 'same'): 6, ('great', 'much'): 6, ('first', 'ridiculous'): 6, ('bad', 'entire'): 6, ('new', 'own'): 6, ('great', 'new'): 6, ('much', 'such'): 6, ('awful', 'main'): 6, ('big', 'such'): 6, ('bad', 'latter'): 6, ('black', 'main'): 6, ('main', 'modern'): 6, ('main', 'such'): 6, ('emotional', 'main'): 6, ('baroque', 'main'): 6, ('famous', 'main'): 6, ('emotional', 'evil'): 6, ('dead', 'good'): 6, ('baroque', 'good'): 6, ('famous', 'good'): 6, ('new', 'other'): 6, ('personal', 'real'): 6, ('enjoyment', 'real'): 6, ('good', 'successful'): 6, ('awful', 'real'): 6, ('miserable', 'real'): 6, ('least', 'true'): 6, ('real', 'short'): 6, ('real', 'successful'): 6, ('alcoholic', 'real'): 6, ('nice', 'real'): 6, ('high', 'real'): 6, ('/>These', 'real'): 6, ('beautiful', 'real'): 6, ('indulgent', 'real'): 6, ('narcissistic', 'real'): 6, ('big', 'real'): 6, ('beefy', 'real'): 6, ('real', 'skinny'): 6, ('early', 'real'): 6, ('Many', 'real'): 6, ('real', 'vulgar'): 6, ('larger', 'real'): 6, ('dumb', 'real'): 6, ('real', 'special'): 6, ('real', 'stupid'): 6, ('dead', 'real'): 6, ('lousy', 'real'): 6, ('-', 'least'): 6, ('beautiful', 'many'): 6, ('most', 'same'): 6, ('absurd', 'funny'): 6, ('absurd', 'better'): 6, ('special', 'such'): 6, ('bad', 'most'): 6, ('least', 'worst'): 6, ('more', 'small'): 6, ('Sunny', 'first'): 6, ('first', 'wounded'): 6, ('first', 'medical'): 6, ('more', 'silly'): 6, ('obvious', 'sexy'): 6, ('Sunny', 'more'): 6, ('bad', 'sad'): 6, ('more', 'wounded'): 6, ('more', 'sure'): 6, ('little', 'more'): 6, ('little', 'same'): 6, ('medical', 'more'): 6, ('good', 'wrong'): 6, ('hard', 'other'): 6, ('last', 'many'): 6, ('successful', 'young'): 6, ('many', 'young'): 6, ('boring', 'violent'): 6, ('clear', 'violent'): 6, ('bad', 'blue'): 6, ('friendly', 'violent'): 6, ('main', 'many'): 6, ('double', 'good'): 6, ('bad', 'surprised'): 6, ('evil', 'many'): 6, ('professional', 'terrible'): 6, ('such', 'wicked'): 6, ('equivalent', 'such'): 6, ('20th', 'such'): 6, ('such', 'sweet'): 6, ('human', 'such'): 6, ('pure', 'such'): 6, ('daily', 'such'): 6, ('constant', 'such'): 6, ('cruel', 'such'): 6, ('monstrous', 'such'): 6, ('insane', 'such'): 6, ('preposterous', 'such'): 6, ('ill', 'such'): 6, ('lost', 'such'): 6, ('Sane', 'such'): 6, ('addicted', 'such'): 6, ('real', 'such'): 6, ('horrible', 'such'): 6, ('legal', 'such'): 6, ('such', 'vast'): 6, ('deceitful', 'such'): 6, ('selfish', 'such'): 6, ('less', 'such'): 6, ('civil', 'such'): 6, ('rational', 'such'): 6, ('concerned', 'such'): 6, ('such', 'vile'): 6, ('intellectual', 'such'): 6, ('brutal', 'such'): 6, ('perfect', 'terrible'): 5, ('small', 'terrible'): 5, ('same', 'terrible'): 5, ('hard', 'worst'): 5, ('interesting', 'whole'): 5, ('better', 'worst'): 5, ('good', 'less'): 5, ('best', 'better'): 5, ('better', 'same'): 5, ('black', 'funny'): 5, ('OK', 'funny'): 5, ('funny', 'old'): 5, ('good', 'proper'): 5, ('good', 'worth'): 5, ('first', 'funny'): 5, ('complete', 'good'): 5, ('good', 'particular'): 5, ('bad', 'lousy'): 5, ('bad', 'cheesy'): 5, ('bad', 'super'): 5, ('bad', 'real'): 5, ('interesting', 'sure'): 5, ('bad', 'old'): 5, ('gay', 'old'): 5, ('short', 'special'): 5, ('high', 'old'): 5, ('high', 'special'): 5, ('high', 'same'): 5, ('special', 'top'): 5, ('awful', 'whole'): 5, ('predictable', 'whole'): 5, ('first', 'whole'): 5, ('many', 'other'): 5, ('black', 'other'): 5, ('many', 'subtle'): 5, ('boring', 'many'): 5, ('much', 'only'): 5, ('first', 'old'): 5, ('black', 'slow'): 5, ('first', 'poor'): 5, ('good', 'odd'): 5, ('decent', 'much'): 5, ('black', 'special'): 5, ('awful', 'nice'): 5, ('bad', 'embarrassing'): 5, ('bad', 'easy'): 5, ('impossible', 'terrible'): 5, ('basic', 'same'): 5, ('different', 'same'): 5, ('few', 'predictable'): 5, ('last', 'second'): 5, ('special', 'stupid'): 5, ('horrible', 'stupid'): 5, ('bad', 'tough'): 5, ('good', 'obvious'): 5, ('little', 'tough'): 5, ('ridiculous', 'same'): 5, ('same', 'unlikely'): 5, ('evil', 'simplistic'): 5, ('deep', 'evil'): 5, ('boring', 'evil'): 5, ('confusing', 'evil'): 5, ('evil', 'inept'): 5, ('evil', 'phsycotic'): 5, ('great', 'little'): 5, ('great', 'main'): 5, ('interesting', 'new'): 5, ('long', 'more'): 5, ('main', 'much'): 5, ('big', 'last'): 5, ('last', 'new'): 5, ('awful', 'big'): 5, ('bad', 'big'): 5, ('big', 'more'): 5, ('big', 'interesting'): 5, ('big', 'other'): 5, ('big', 'first'): 5, ('fantastic', 'good'): 5, ('main', 'other'): 5, ('interesting', 'more'): 5, ('great', 'more'): 5, ('good', 'indulgent'): 5, ('dramatic', 'good'): 5, ('black', 'such'): 5, ('black', 'great'): 5, ('first', 'modern'): 5, ('original', 'same'): 5, ('good', 'true'): 5, ('beautiful', 'nice'): 5, ('least', 'most'): 5, ('Many', 'many'): 5, ('horrible', 'more'): 5, ('best', 'many'): 5, ('many', 'negative'): 5, ('many', 'obnoxious'): 5, ('great', 'old'): 5, ('only', 'such'): 5, ('big', 'only'): 5, ('first', 'most'): 5, ('dead', 'young'): 5, ('first', 'particular'): 5, ('more', 'tough'): 5, ('long', 'sexy'): 5, ('good', 'impossible'): 5, ('first', 'typical'): 5, ('bad', 'boring'): 5, ('better', 'boring'): 5, ('better', 'few'): 5, ('least', 'little'): 5, ('adequate', 'bad'): 5, ('bad', 'uncomprehended'): 5, ('bad', 'pastel'): 5, ('bad', 'coloured'): 5, ('bad', 'physical'): 5, ('bad', 'psychological'): 5, ('bad', 'tacky'): 5, ('bad', 'hilarious'): 5, ('bad', 'bankrupt'): 5, ('bad', 'top'): 4, ('OK', 'top'): 4, ('best', 'terrible'): 4, ('less', 'terrible'): 4, ('interesting', 'terrible'): 4, ('hard', 'same'): 4, ('bad', 'racist'): 4, ('bad', 'less'): 4, ('OK', 'bad'): 4, ('OK', 'whole'): 4, ('same', 'whole'): 4, ('same', 'worst'): 4, ('same', 'small'): 4, ('better', 'last'): 4, ('OK', 'same'): 4, ('irish', 'oirish'): 4, ('funny', 'generous'): 4, ('OK', 'old'): 4, ('good', 'mediocre'): 4, ('bad', 'worth'): 4, ('slow', 'wooden'): 4, ('great', 'own'): 4, ('complete', 'great'): 4, ('complete', 'first'): 4, ('funny', 'own'): 4, ('good', 'next'): 4, ('many', 'particular'): 4, ('many', 'much'): 4, ('familiar', 'many'): 4, ('best', 'full'): 4, ('bad', 'worthy'): 4, ('bad', 'right'): 4, ('bad', 'next'): 4, ('bleak', 'sloppy'): 4, ('-', 'black'): 4, ('better', 'sure'): 4, ('better', 'common'): 4, ('black', 'full'): 4, ('hard', 'old'): 4, ('old', 'such'): 4, ('least', 'short'): 4, ('awful', 'high'): 4, ('emotional', 'high'): 4, ('high', 'silly'): 4, ('fancy', 'high'): 4, ('black', 'high'): 4, ('high', 'poor'): 4, ('least', 'top'): 4, ('only', 'top'): 4, ('least', 'other'): 4, ('least', 'many'): 4, ('least', 'old'): 4, ('emotional', 'least'): 4, ('funny', 'least'): 4, ('first', 'least'): 4, ('fancy', 'least'): 4, ('black', 'least'): 4, ('least', 'poor'): 4, ('funny', 'whole'): 4, ('slow', 'whole'): 4, ('special', 'whole'): 4, ('right', 'slow'): 4, ('right', 'same'): 4, ('emotional', 'grotesque'): 4, ('good', 'grotesque'): 4, ('black', 'grotesque'): 4, ('emotional', 'other'): 4, ('other', 'slow'): 4, ('odd', 'other'): 4, ('much', 'other'): 4, ('emotional', 'many'): 4, ('many', 'special'): 4, ('funny', 'subtle'): 4, ('old', 'slow'): 4, ('good', 'old'): 4, ('predictable', 'slow'): 4, ('good', 'predictable'): 4, ('poor', 'predictable'): 4, ('emotional', 'slow'): 4, ('emotional', 'first'): 4, ('emotional', 'special'): 4, ('emotional', 'interested'): 4, ('average', 'emotional'): 4, ('emotional', 'fancy'): 4, ('emotional', 'much'): 4, ('emotional', 'same'): 4, ('funny', 'slow'): 4, ('boring', 'funny'): 4, ('funny', 'same'): 4, ('boring', 'slow'): 4, ('fancy', 'slow'): 4, ('poor', 'slow'): 4, ('decent', 'good'): 4, ('boring', 'special'): 4, ('single', 'special'): 4, ('fancy', 'special'): 4, ('same', 'special'): 4, ('good', 'interested'): 4, ('black', 'interested'): 4, ('boring', 'much'): 4, ('boring', 'good'): 4, ('black', 'boring'): 4, ('dull', 'good'): 4, ('black', 'single'): 4, ('good', 'silly'): 4, ('cheap', 'good'): 4, ('average', 'black'): 4, ('black', 'fancy'): 4, ('black', 'much'): 4, ('cool', 'good'): 4, ('disappointing', 'same'): 4, ('giant', 'nice'): 4, ('bad', 'nice'): 4, ('nice', 'terrible'): 4, ('nice', 'whole'): 4, ('female', 'nice'): 4, ('awful', 'extreme'): 4, ('other', 'such'): 4, ('other', 'terrible'): 4, ('basic', 'other'): 4, ('other', 'wrong'): 4, ('sure', 'whole'): 4, ('awful', 'final'): 4, ('awful', 'giant'): 4, ('bad', 'giant'): 4, ('giant', 'terrible'): 4, ('giant', 'whole'): 4, ('able', 'bad'): 4, ('bad', 'impossible'): 4, ('bad', 'wrong'): 4, ('female', 'terrible'): 4, ('terrible', 'wrong'): 4, ('female', 'whole'): 4, ('same', 'wrong'): 4, ('great', 'last'): 4, ('first', 'stupid'): 4, ('stupid', 'tough'): 4, ('single', 'stupid'): 4, ('only', 'stupid'): 4, ('stupid', 'uninspired'): 4, ('bright', 'good'): 4, ('bright', 'first'): 4, ('first', 'obvious'): 4, ('last', 'only'): 4, ('best', 'horrible'): 4, ('better', 'big'): 4, ('infamous', 'new'): 4, ('many', 'ridiculous'): 4, ('first', 'willing'): 4, ('entire', 'little'): 4, ('funny', 'horrible'): 4, ('Terrible', 'good'): 4, ('good', 'ill'): 4, ('interesting', 'main'): 4, ('cheap', 'great'): 4, ('average', 'main'): 4, ('better', 'gory'): 4, ('absurd', 'great'): 4, ('absurd', 'good'): 4, ('better', 'bitter'): 4, ('interesting', 'much'): 4, ('last', 'more'): 4, ('famous', 'last'): 4, ('awful', 'evil'): 4, ('awful', 'own'): 4, ('awful', 'interesting'): 4, ('awful', 'modern'): 4, ('awful', 'new'): 4, ('-', 'awful'): 4, ('awful', 'baroque'): 4, ('awful', 'famous'): 4, ('big', 'evil'): 4, ('big', 'black'): 4, ('big', 'modern'): 4, ('big', 'emotional'): 4, ('big', 'same'): 4, ('-', 'big'): 4, ('baroque', 'big'): 4, ('big', 'famous'): 4, ('modern', 'tenuous'): 4, ('good', 'latter'): 4, ('evil', 'fantastic'): 4, ('evil', 'ridiculous'): 4, ('more', 'ridiculous'): 4, ('bad', 'main'): 4, ('first', 'main'): 4, ('evil', 'own'): 4, ('black', 'evil'): 4, ('evil', 'modern'): 4, ('baroque', 'evil'): 4, ('evil', 'famous'): 4, ('more', 'new'): 4, ('anachronistic', 'good'): 4, ('good', 'musical'): 4, ('own', 'same'): 4, ('interesting', 'such'): 4, ('black', 'modern'): 4, ('baroque', 'black'): 4, ('black', 'famous'): 4, ('modern', 'such'): 4, ('emotional', 'modern'): 4, ('baroque', 'modern'): 4, ('famous', 'modern'): 4, ('baroque', 'such'): 4, ('great', 'such'): 4, ('famous', 'such'): 4, ('minor', 'such'): 4, ('complete', 'other'): 4, ('best', 'other'): 4, ('baroque', 'emotional'): 4, ('emotional', 'famous'): 4, ('great', 'same'): 4, ('-', 'dead'): 4, ('first', 'original'): 4, ('famous', 'first'): 4, ('baroque', 'famous'): 4, ('good', 'personal'): 4, ('criminal', 'good'): 4, ('good', 'nice'): 4, ('Nazi', 'good'): 4, ('brutal', 'good'): 4, ('awful', 'stupid'): 4, ('criminal', 'true'): 4, ('big', 'true'): 4, ('Nazi', 'true'): 4, ('criminal', 'least'): 4, ('-', 'criminal'): 4, ('Nazi', 'criminal'): 4, ('least', 'vulgar'): 4, ('Nazi', 'least'): 4, ('big', 'special'): 4, ('horrible', 'most'): 4, ('-', 'Nazi'): 4, ('more', 'poor'): 4, ('modern', 'unfunny'): 4, ('same', 'unfunny'): 4, ('horrible', 'same'): 4, ('great', 'whole'): 4, ('many', 'plain'): 4, ('absurd', 'more'): 4, ('common', 'many'): 4, ('hotter', 'many'): 4, ('absurdist', 'many'): 4, ('fantastical', 'many'): 4, ('many', 'skeptical'): 4, ('critical', 'many'): 4, ('bitter', 'many'): 4, ('many', 'unaired'): 4, ('correct', 'many'): 4, ('many', 'psychotic'): 4, ('better', 'negative'): 4, ('good', 'negative'): 4, ('Other', 'same'): 4, ('repetitive', 'same'): 4, ('first', 'long'): 4, ('popular', 'special'): 4, ('ridiculous', 'special'): 4, ('bad', 'least'): 4, ('bad', 'yellow'): 4, ('bad', 'poor'): 4, ('funny', 'new'): 4, ('funny', 'worst'): 4, ('same', 'yellow'): 4, ('least', 'red'): 4, ('least', 'yellow'): 4, ('last', 'least'): 4, ('slow', 'yellow'): 4, ('poor', 'young'): 4, ('dead', 'poor'): 4, ('first', 'full'): 4, ('possible', 'terrible'): 4, ('sexy', 'tragic'): 4, ('high', 'sexy'): 4, ('silly', 'small'): 4, ('Sunny', 'small'): 4, ('small', 'wounded'): 4, ('medical', 'small'): 4, ('much', 'sexy'): 4, ('much', 'sure'): 4, ('sexy', 'tough'): 4, ('Sunny', 'silly'): 4, ('silly', 'wounded'): 4, ('medical', 'silly'): 4, ('modern', 'sexy'): 4, ('femme', 'sexy'): 4, ('fatal', 'sexy'): 4, ('bad', 'sexy'): 4, ('sexy', 'sure'): 4, ('good', 'sexy'): 4, ('fresh', 'sexy'): 4, ('little', 'sexy'): 4, ('poor', 'sexy'): 4, ('classic', 'sexy'): 4, ('Double', 'sexy'): 4, ('bright', 'sexy'): 4, ('devious', 'sexy'): 4, ('particular', 'sexy'): 4, ('compelling', 'sexy'): 4, ('same', 'sexy'): 4, ('rural', 'sexy'): 4, ('implausible', 'sexy'): 4, ('ridiculous', 'sexy'): 4, ('sad', 'sexy'): 4, ('sexy', 'stereotypical'): 4, ('Hispanic', 'sexy'): 4, ('sexy', 'unintentional'): 4, ('red', 'sexy'): 4, ('Sunny', 'wounded'): 4, ('Sunny', 'medical'): 4, ('medical', 'wounded'): 4, ('good', 'sure'): 4, ('more', 'particular'): 4, ('beautiful', 'much'): 4, ('much', 'nice'): 4, ('Mediterrenean', 'beautiful'): 4, ('beautiful', 'lovely'): 4, ('beautiful', 'romantic'): 4, ('beautiful', 'convincing'): 4, ('beautiful', 'egomaniac'): 4, ('beautiful', 'bent'): 4, ('beautiful', 'vulnerable'): 4, ('beautiful', 'familiar'): 4, ('beautiful', 'macho'): 4, ('beautiful', 'obnoxious'): 4, ('beautiful', 'difficult'): 4, ('beautiful', 'central'): 4, ('beautiful', 'mediocre'): 4, ('azure', 'female'): 4, ('convincing', 'many'): 4, ('little', 'other'): 4, ('little', 'new'): 4, ('little', 'most'): 4, ('catholic', 'other'): 4, ('catholic', 'young'): 4, ('catholic', 'good'): 4, ('catholic', 'tough'): 4, ('other', 'young'): 4, ('other', 'tough'): 4, ('most', 'other'): 4, ('better', 'new'): 4, ('hard', 'young'): 4, ('hard', 'impossible'): 4, ('more', 'young'): 4, ('tough', 'young'): 4, ('same', 'young'): 4, ('broad', 'good'): 4, ('-', 'terrible'): 4, ('funny', 'smart'): 4, ('bad', 'typical'): 4, ('bad', 'full'): 4, ('bad', 'close'): 4, ('bad', 'technical'): 4, ('close', 'good'): 4, ('little', 'typical'): 4, ('many', 'typical'): 4, ('many', 'sure'): 4, ('last', 'typical'): 4, ('clear', 'first'): 4, ('much', 'successful'): 4, ('vampire', 'whole'): 4, ('few', 'other'): 4, ('angry', 'young'): 4, ('unrelenting', 'young'): 4, ('closest', 'young'): 4, ('rundown', 'young'): 4, ('usual', 'young'): 4, ('personal', 'young'): 4, ('entrenched', 'young'): 4, ('older', 'young'): 4, ('experienced', 'young'): 4, ('dramatic', 'young'): 4, ('overall', 'young'): 4, ('complex', 'young'): 4, ('only', 'worst'): 4, ('few', 'funny'): 4, ('boring', 'clear'): 4, ('boring', 'friendly'): 4, ('clear', 'friendly'): 4, ('big', 'few'): 4, ('corrupt', 'good'): 4, ('double', 'many'): 4, ('corrupt', 'double'): 4, ('corrupt', 'many'): 4, ('Other', 'honorable'): 4, ('beggar', 'honorable'): 4, ('honorable', 'hungry'): 4, ('Other', 'beggar'): 4, ('Other', 'hungry'): 4, ('beggar', 'hungry'): 4, ('next', 'same'): 4, ('bad', 'weird'): 4, ('only', 'terrible'): 4, ('professional', 'worst'): 4, ('good', 'professional'): 4, ('least', 'professional'): 4, ('hard', 'professional'): 4, ('constant', 'good'): 4, ('hard', 'least'): 4, ('good', 'pure'): 4, ('basic', 'big'): 4, ('basic', 'little'): 4, ('basic', 'much'): 4, ('basic', 'least'): 4, ('convincing', 'interesting'): 4, ('convincing', 'first'): 4, ('convincing', 'particular'): 4, ('convincing', 'sure'): 4, ('hard', 'main'): 4, ('Spanish', 'pretty'): 4, ('main', 'middle'): 4, ('main', 'popular'): 4, ('certain', 'main'): 4, ('main', 'ready'): 4, ('main', 'mixed'): 4, ('gratuitous', 'main'): 4, ('main', 'vital'): 4, ('impetuous', 'main'): 4, ('boorish', 'main'): 4, ('main', 'military'): 4, ...})
In [7]:
# 上位30件の共起ペアを抽出
top_positive_pairs = cooccurrence_positive.most_common(30)
top_negative_pairs = cooccurrence_negative.most_common(30)
In [ ]:
import csv
from pyvis.network import Network
from collections import defaultdict
def visualize_cooccurrence_with_node_size(top_pairs, tokens_list, title):
"""
共起ペアをネットワークとして可視化し、ノードとエッジをCSVに出力する関数。
ノードのサイズは単語の出現回数に基づいて設定。
"""
net = Network(height="750px", width="100%", notebook=True)
net.force_atlas_2based() # レイアウト設定
# 単語の出現回数を計算
word_counts = defaultdict(int)
for tokens in tokens_list:
for token in tokens:
word_counts[token] += 1
nodes = set() # ノードを管理するセット
edges = [] # エッジを管理するリスト
# ノードとエッジを追加
for (word1, word2), weight in top_pairs:
# ノードを追加(重複を避ける)
if word1 not in nodes:
net.add_node(word1, label=word1, size=word_counts[word1])
nodes.add(word1)
if word2 not in nodes:
net.add_node(word2, label=word2, size=word_counts[word2])
nodes.add(word2)
# エッジを追加
edges.append((word1, word2, weight))
net.add_edge(word1, word2, value=weight)
# ノードをCSVに出力
nodes_csv = f"nodes_{title}.csv"
with open(nodes_csv, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['word', 'size']) # ヘッダー
for node in nodes:
writer.writerow([node, word_counts[node]]) # ノードを保存
print(f"Nodes saved as {nodes_csv}")
# エッジをCSVに出力
edges_csv = f"edges_{title}.csv"
with open(edges_csv, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['word1', 'word2', 'weight']) # ヘッダー
writer.writerows(edges) # エッジを保存
print(f"Edges saved as {edges_csv}")
# HTMLファイルとして保存して表示
output_file = f"{title}.html"
net.show(output_file)
print(f"Visualization saved as {output_file}")
In [25]:
# ポジティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
top_positive_pairs,
review_positive['tokens'],
"positive_cooccurrence_adjectives"
)
Warning: When cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook. Nodes saved as nodes_positive_cooccurrence_adjectives.csv Edges saved as edges_positive_cooccurrence_adjectives.csv positive_cooccurrence_adjectives.html Visualization saved as positive_cooccurrence_adjectives.html
In [27]:
# ネガティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
top_negative_pairs,
review_negative['tokens'],
"negative_cooccurrence_adjectives"
)
Warning: When cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook. Nodes saved as nodes_negative_cooccurrence_adjectives.csv Edges saved as edges_negative_cooccurrence_adjectives.csv negative_cooccurrence_adjectives.html Visualization saved as negative_cooccurrence_adjectives.html
In [ ]: