In [1]:
# 必要なライブラリをインポート
import pandas as pd
import re
from itertools import combinations
from collections import Counter
from pyvis.network import Network
import spacy

# spaCyモデルのロード
nlp = spacy.load("en_core_web_sm")

# 動詞だけを抽出する関数
def extract_verbs(text):
    doc = nlp(text)
    return [token.text for token in doc if token.pos_ == "VERB"]

# 形容詞だけを抽出する関数
def extract_adjectives(text):
    doc = nlp(text)
    return [token.text for token in doc if token.pos_ == "ADJ"]

# IMDbデータセットを読み込む
data = pd.read_csv("IMDB Dataset.csv")
data.head()
Out[1]:
review sentiment
0 One of the other reviewers has mentioned that ... positive
1 A wonderful little production. <br /><br />The... positive
2 I thought this was a wonderful way to spend ti... positive
3 Basically there's a family where a little boy ... negative
4 Petter Mattei's "Love in the Time of Money" is... positive
In [2]:
# ポジティブレビューとネガティブレビューのサンプリング
review_positive = data[data['sentiment'] == 'positive'].sample(n=100, random_state=50)
review_negative = data[data['sentiment'] == 'negative'].sample(n=100, random_state=50)
In [3]:
# ポジティブレビューとネガティブレビューの動詞抽出
#review_positive['tokens'] = review_positive['review'].apply(extract_verbs)
#review_negative['tokens'] = review_negative['review'].apply(extract_verbs)

# ポジティブレビューとネガティブレビューの形容詞抽出
review_positive['tokens'] = review_positive['review'].apply(extract_adjectives)
review_negative['tokens'] = review_negative['review'].apply(extract_adjectives)
In [4]:
# 共起ペア生成関数(順序を無視してカウント)
def generate_cooccurrence(tokens_list):
    """
    トークンリストから順序を無視した共起ペアを生成し、頻度をカウント
    """
    pairs = []
    for tokens in tokens_list:
        # 自己ペアを排除し、ソートしたペアを生成
        pairs.extend(tuple(sorted((a, b))) for a, b in combinations(tokens, 2) if a != b)
    return Counter(pairs)
In [5]:
# ポジティブレビューの共起ペア生成
cooccurrence_positive = generate_cooccurrence(review_positive['tokens'])
cooccurrence_positive
Out[5]:
Counter({('good', 'great'): 38,
         ('good', 'little'): 30,
         ('good', 'other'): 29,
         ('little', 'other'): 23,
         ('great', 'other'): 23,
         ('best', 'young'): 23,
         ('good', 'many'): 22,
         ('best', 'great'): 22,
         ('best', 'old'): 22,
         ('little', 'more'): 20,
         ('best', 'same'): 20,
         ('old', 'young'): 20,
         ('best', 'many'): 19,
         ('other', 'same'): 19,
         ('good', 'own'): 18,
         ('first', 'other'): 18,
         ('many', 'other'): 17,
         ('many', 'most'): 16,
         ('few', 'other'): 16,
         ('many', 'young'): 16,
         ('big', 'silent'): 16,
         ('better', 'good'): 15,
         ('little', 'own'): 15,
         ('many', 'same'): 15,
         ('best', 'new'): 15,
         ('best', 'other'): 15,
         ('good', 'nice'): 15,
         ('good', 'more'): 15,
         ('good', 'low'): 15,
         ('funny', 'good'): 15,
         ('many', 'southern'): 15,
         ('most', 'southern'): 15,
         ('real', 'silent'): 15,
         ('short', 'silent'): 15,
         ('little', 'many'): 14,
         ('little', 'most'): 14,
         ('best', 'little'): 14,
         ('great', 'new'): 14,
         ('few', 'good'): 14,
         ('first', 'good'): 14,
         ('little', 'such'): 14,
         ('big', 'great'): 14,
         ('most', 'other'): 14,
         ('good', 'such'): 14,
         ('best', 'good'): 14,
         ('first', 'little'): 14,
         ('much', 'other'): 13,
         ('little', 'same'): 13,
         ('new', 'same'): 13,
         ('few', 'great'): 13,
         ('brilliant', 'other'): 13,
         ('great', 'many'): 13,
         ('more', 'other'): 13,
         ('beautiful', 'great'): 13,
         ('little', 'low'): 13,
         ('older', 'younger'): 12,
         ('great', 'old'): 12,
         ('new', 'other'): 12,
         ('good', 'only'): 12,
         ('great', 'little'): 12,
         ('good', 'most'): 12,
         ('full', 'other'): 12,
         ('other', 'strong'): 12,
         ('great', 'same'): 12,
         ('great', 'real'): 12,
         ('good', 'much'): 11,
         ('little', 'much'): 11,
         ('last', 'other'): 11,
         ('many', 'more'): 11,
         ('-', 'many'): 11,
         ('fantastic', 'same'): 11,
         ('many', 'own'): 11,
         ('own', 'same'): 11,
         ('great', 'silent'): 11,
         ('most', 'young'): 11,
         ('big', 'little'): 11,
         ('good', 'real'): 11,
         ('funny', 'own'): 11,
         ('great', 'perfect'): 11,
         ('good', 'same'): 11,
         ('many', 'old'): 11,
         ('first', 'same'): 11,
         ('better', 'other'): 10,
         ('little', 'real'): 10,
         ('many', 'new'): 10,
         ('best', 'most'): 10,
         ('great', 'own'): 10,
         ('other', 'own'): 10,
         ('good', 'several'): 10,
         ('great', 'strong'): 10,
         ('little', 'true'): 10,
         ('few', 'many'): 10,
         ('only', 'other'): 10,
         ('great', 'most'): 10,
         ('other', 'real'): 10,
         ('funny', 'great'): 10,
         ('better', 'great'): 10,
         ('best', 'strong'): 10,
         ('big', 'short'): 10,
         ('big', 'real'): 10,
         ('entertaining', 'good'): 10,
         ('original', 'other'): 10,
         ('intelligent', 'southern'): 10,
         ('other', 'southern'): 10,
         ('southern', 'young'): 10,
         ('southern', 'strong'): 10,
         ('silent', 'sound'): 10,
         ('silent', 'such'): 10,
         ('greatest', 'silent'): 10,
         ('last', 'little'): 9,
         ('many', 'much'): 9,
         ('dead', 'own'): 9,
         ('purgatory', 'same'): 9,
         ('new', 'purgatory'): 9,
         ('impossible', 'purgatory'): 9,
         ('impossible', 'same'): 9,
         ('best', 'own'): 9,
         ('impossible', 'new'): 9,
         ('new', 'real'): 9,
         ('best', 'real'): 9,
         ('old', 'own'): 9,
         ('good', 'old'): 9,
         ('other', 'young'): 9,
         ('great', 'such'): 9,
         ('good', 'least'): 9,
         ('first', 'full'): 9,
         ('great', 'only'): 9,
         ('great', 'long'): 9,
         ('enough', 'great'): 9,
         ('other', 'realistic'): 9,
         ('old', 'same'): 9,
         ('full', 'little'): 9,
         ('few', 'same'): 9,
         ('great', 'original'): 9,
         ('real', 'short'): 9,
         ('good', 'last'): 8,
         ('least', 'more'): 8,
         ('more', 'same'): 8,
         ('more', 'own'): 8,
         ('more', 'new'): 8,
         ('European', 'little'): 8,
         ('much', 'same'): 8,
         ('most', 'same'): 8,
         ('real', 'same'): 8,
         ('new', 'own'): 8,
         ('American', 'young'): 8,
         ('few', 'first'): 8,
         ('good', 'young'): 8,
         ('many', 'strong'): 8,
         ('few', 'little'): 8,
         ('many', 'true'): 8,
         ('enough', 'few'): 8,
         ('little', 'main'): 8,
         ('big', 'such'): 8,
         ('big', 'most'): 8,
         ('bad', 'good'): 8,
         ('different', 'good'): 8,
         ('funny', 'little'): 8,
         ('same', 'young'): 8,
         ('brilliant', 'same'): 8,
         ('best', 'big'): 8,
         ('great', 'wonderful'): 8,
         ('best', 'better'): 8,
         ('new', 'strong'): 8,
         ('amazing', 'other'): 8,
         ('best', 'long'): 8,
         ('beautiful', 'best'): 8,
         ('best', 'dark'): 8,
         ('amazing', 'great'): 8,
         ('good', 'high'): 8,
         ('innocent', 'little'): 8,
         ('amusing', 'good'): 8,
         ('long', 'most'): 8,
         ('most', 'old'): 8,
         ('bad', 'better'): 8,
         ('short', 'such'): 8,
         ('better', 'much'): 7,
         ('more', 'much'): 7,
         ('best', 'more'): 7,
         ('-', 'little'): 7,
         ('close', 'little'): 7,
         ('dead', 'little'): 7,
         ('least', 'same'): 7,
         ('much', 'own'): 7,
         ('much', 'new'): 7,
         ('most', 'own'): 7,
         ('best', 'clean'): 7,
         ('American', 'old'): 7,
         ('old', 'other'): 7,
         ('own', 'young'): 7,
         ('first', 'own'): 7,
         ('important', 'most'): 7,
         ('good', 'memorable'): 7,
         ('few', 'only'): 7,
         ('little', 'sure'): 7,
         ('little', 'only'): 7,
         ('other', 'sure'): 7,
         ('many', 'such'): 7,
         ('bad', 'many'): 7,
         ('American', 'many'): 7,
         ('big', 'good'): 7,
         ('bad', 'most'): 7,
         ('few', 'most'): 7,
         ('different', 'other'): 7,
         ('great', 'short'): 7,
         ('better', 'many'): 7,
         ('other', 'various'): 7,
         ('best', 'true'): 7,
         ('same', 'various'): 7,
         ('same', 'true'): 7,
         ('great', 'true'): 7,
         ('charming', 'little'): 7,
         ('little', 'short'): 7,
         ('nice', 'short'): 7,
         ('low', 'other'): 7,
         ('good', 'interesting'): 7,
         ('good', 'true'): 7,
         ('better', 'entertaining'): 7,
         ('great', 'realistic'): 7,
         ('hot', 'nice'): 7,
         ('long', 'old'): 7,
         ('beautiful', 'many'): 7,
         ('fantastic', 'other'): 7,
         ('good', 'wild'): 7,
         ('effective', 'good'): 7,
         ('original', 'same'): 7,
         ('fantastic', 'original'): 7,
         ('full', 'same'): 7,
         ('high', 'other'): 7,
         ('good', 'special'): 7,
         ('most', 'strong'): 7,
         ('most', 'short'): 7,
         ('best', 'comic'): 7,
         ('comic', 'little'): 7,
         ('many', 'serious'): 7,
         ('new', 'realistic'): 7,
         ('anti', 'little'): 7,
         ('amazing', 'realistic'): 7,
         ('last', 'much'): 6,
         ('classic', 'much'): 6,
         ('classic', 'good'): 6,
         ('better', 'little'): 6,
         ('-', 'more'): 6,
         ('close', 'more'): 6,
         ('entertaining', 'many'): 6,
         ('little', 'new'): 6,
         ('European', 'many'): 6,
         ('dead', 'many'): 6,
         ('many', 'purgatory'): 6,
         ('impossible', 'many'): 6,
         ('best', 'close'): 6,
         ('dead', 'purgatory'): 6,
         ('dead', 'same'): 6,
         ('dead', 'new'): 6,
         ('dead', 'impossible'): 6,
         ('own', 'purgatory'): 6,
         ('best', 'purgatory'): 6,
         ('general', 'same'): 6,
         ('general', 'own'): 6,
         ('impossible', 'own'): 6,
         ('best', 'general'): 6,
         ('best', 'impossible'): 6,
         ('old', 'silent'): 6,
         ('good', 'new'): 6,
         ('good', 'pretty'): 6,
         ('brilliant', 'many'): 6,
         ('amazing', 'many'): 6,
         ('able', 'many'): 6,
         ('many', 'non'): 6,
         ('many', 'sure'): 6,
         ('many', 'sensitive'): 6,
         ('many', 'only'): 6,
         ('other', 'true'): 6,
         ('brilliant', 'little'): 6,
         ('famous', 'little'): 6,
         ('important', 'many'): 6,
         ('big', 'famous'): 6,
         ('important', 'other'): 6,
         ('bad', 'other'): 6,
         ('able', 'famous'): 6,
         ('own', 'such'): 6,
         ('classic', 'famous'): 6,
         ('bad', 'great'): 6,
         ('famous', 'much'): 6,
         ('great', 'local'): 6,
         ('great', 'last'): 6,
         ('American', 'most'): 6,
         ('little', 'young'): 6,
         ('funny', 'more'): 6,
         ('many', 'subtle'): 6,
         ('corrupt', 'ordinary'): 6,
         ('Estonian', 'ordinary'): 6,
         ('better', 'first'): 6,
         ('entire', 'real'): 6,
         ('first', 'real'): 6,
         ('entire', 'good'): 6,
         ('great', 'more'): 6,
         ('better', 'same'): 6,
         ('better', 'new'): 6,
         ('beautiful', 'strong'): 6,
         ('best', 'emotional'): 6,
         ('beautiful', 'true'): 6,
         ('new', 'true'): 6,
         ('easy', 'good'): 6,
         ('only', 'own'): 6,
         ('most', 'only'): 6,
         ('more', 'such'): 6,
         ('high', 'own'): 6,
         ('low', 'own'): 6,
         ('funny', 'such'): 6,
         ('good', 'innocent'): 6,
         ('good', 'wonderful'): 6,
         ('dramatic', 'good'): 6,
         ('new', 'original'): 6,
         ('great', 'worse'): 6,
         ('beautiful', 'good'): 6,
         ('long', 'many'): 6,
         ('big', 'right'): 6,
         ('only', 'strong'): 6,
         ('first', 'more'): 6,
         ('brilliant', 'original'): 6,
         ('high', 'same'): 6,
         ('intelligent', 'many'): 6,
         ('intelligent', 'most'): 6,
         ('realistic', 'strong'): 6,
         ('American', 'best'): 6,
         ('funny', 'perfect'): 6,
         ('good', 'perfect'): 6,
         ('only', 'real'): 6,
         ('greatest', 'old'): 6,
         ('best', 'greatest'): 6,
         ('comic', 'good'): 6,
         ('complex', 'little'): 6,
         ('average', 'good'): 6,
         ('good', 'ready'): 6,
         ('little', 'realistic'): 6,
         ('difficult', 'little'): 6,
         ('big', 'sound'): 6,
         ('big', 'greatest'): 6,
         ('silent', 'special'): 6,
         ('real', 'sound'): 6,
         ('short', 'sound'): 6,
         ('real', 'such'): 6,
         ('greatest', 'real'): 6,
         ('greatest', 'short'): 6,
         ('half', 'little'): 6,
         ('comic', 'nice'): 6,
         ('married', 'nice'): 6,
         ('desperate', 'new'): 6,
         ('dated', 'new'): 6,
         ('better', 'cheap'): 6,
         ('much', 'older'): 5,
         ('last', 'older'): 5,
         ('last', 'younger'): 5,
         ('classic', 'little'): 5,
         ('dead', 'more'): 5,
         ('more', 'most'): 5,
         ('more', 'real'): 5,
         ('more', 'ready'): 5,
         ('entertaining', 'same'): 5,
         ('-', 'same'): 5,
         ('general', 'modern'): 5,
         ('initial', 'little'): 5,
         ('little', 'sexual'): 5,
         ('little', 'ready'): 5,
         ('least', 'many'): 5,
         ('least', 'own'): 5,
         ('many', 'religious'): 5,
         ('fantastic', 'own'): 5,
         ('European', 'same'): 5,
         ('close', 'many'): 5,
         ('many', 'single'): 5,
         ('many', 'ready'): 5,
         ('close', 'same'): 5,
         ('mere', 'own'): 5,
         ('clean', 'same'): 5,
         ('same', 'single'): 5,
         ('general', 'new'): 5,
         ('general', 'old'): 5,
         ('few', 'old'): 5,
         ('American', 'great'): 5,
         ('American', 'other'): 5,
         ('American', 'good'): 5,
         ('great', 'young'): 5,
         ('first', 'great'): 5,
         ('general', 'other'): 5,
         ('general', 'young'): 5,
         ('other', 'worth'): 5,
         ('important', 'young'): 5,
         ('great', 'similar'): 5,
         ('able', 'good'): 5,
         ('alive', 'good'): 5,
         ('able', 'few'): 5,
         ('clear', 'little'): 5,
         ('little', 'sweet'): 5,
         ('historical', 'many'): 5,
         ('few', 'much'): 5,
         ('European', 'other'): 5,
         ('more', 'only'): 5,
         ('other', 'physical'): 5,
         ('bad', 'little'): 5,
         ('American', 'little'): 5,
         ('big', 'own'): 5,
         ('bad', 'big'): 5,
         ('main', 'other'): 5,
         ('able', 'great'): 5,
         ('most', 'such'): 5,
         ('brilliant', 'much'): 5,
         ('brilliant', 'last'): 5,
         ('bad', 'much'): 5,
         ('good', 'main'): 5,
         ('female', 'first'): 5,
         ('different', 'strong'): 5,
         ('little', 'next'): 5,
         ('little', 'obvious'): 5,
         ('incredible', 'same'): 5,
         ('brilliant', 'more'): 5,
         ('favorite', 'funny'): 5,
         ('funny', 'same'): 5,
         ('same', 'sure'): 5,
         ('dirty', 'young'): 5,
         ('best', 'dirty'): 5,
         ('different', 'most'): 5,
         ('interesting', 'short'): 5,
         ('better', 'entire'): 5,
         ('better', 'strong'): 5,
         ('important', 'strong'): 5,
         ('difficult', 'real'): 5,
         ('real', 'strong'): 5,
         ('many', 'simple'): 5,
         ('other', 'wonderful'): 5,
         ('funny', 'other'): 5,
         ('best', 'perfect'): 5,
         ('great', 'serious'): 5,
         ('long', 'other'): 5,
         ('beautiful', 'other'): 5,
         ('best', 'important'): 5,
         ('amazing', 'best'): 5,
         ('long', 'same'): 5,
         ('bold', 'great'): 5,
         ('beautiful', 'same'): 5,
         ('great', 'technical'): 5,
         ('charming', 'good'): 5,
         ('easy', 'little'): 5,
         ('low', 'more'): 5,
         ('innocent', 'more'): 5,
         ('high', 'little'): 5,
         ('charismatic', 'own'): 5,
         ('good', 'romantic'): 5,
         ('innocent', 'such'): 5,
         ('greatest', 'such'): 5,
         ('dead', 'good'): 5,
         ('little', 'smart'): 5,
         ('better', 'funny'): 5,
         ('bad', 'same'): 5,
         ('fresh', 'good'): 5,
         ('great', 'high'): 5,
         ('many', 'social'): 5,
         ('few', 'high'): 5,
         ('other', 'silly'): 5,
         ('first', 'nice'): 5,
         ('many', 'surprised'): 5,
         ('bad', 'old'): 5,
         ('absolute', 'other'): 5,
         ('good', 'worse'): 5,
         ('bad', 'fantastic'): 5,
         ('first', 'low'): 5,
         ('old', 'right'): 5,
         ('little', 'right'): 5,
         ('funny', 'old'): 5,
         ('big', 'old'): 5,
         ('brilliant', 'first'): 5,
         ('last', 'original'): 5,
         ('other', 'pure'): 5,
         ('great', 'top'): 5,
         ('sensitive', 'southern'): 5,
         ('social', 'southern'): 5,
         ('civil', 'southern'): 5,
         ('southern', 'subtle'): 5,
         ('realistic', 'southern'): 5,
         ('inside', 'southern'): 5,
         ('non', 'southern'): 5,
         ('-', 'southern'): 5,
         ('slave', 'southern'): 5,
         ('common', 'southern'): 5,
         ('dull', 'southern'): 5,
         ('historical', 'southern'): 5,
         ('simple', 'southern'): 5,
         ('confederate', 'southern'): 5,
         ('brilliant', 'southern'): 5,
         ('important', 'southern'): 5,
         ('southern', 'third'): 5,
         ('liberal', 'southern'): 5,
         ('skilled', 'southern'): 5,
         ('modest', 'southern'): 5,
         ('close', 'southern'): 5,
         ('southern', 'tough'): 5,
         ('drunk', 'southern'): 5,
         ('negligent', 'southern'): 5,
         ('inseparable', 'southern'): 5,
         ('best', 'southern'): 5,
         ('different', 'southern'): 5,
         ('long', 'southern'): 5,
         ('intense', 'southern'): 5,
         ('passionate', 'southern'): 5,
         ('only', 'southern'): 5,
         ('pure', 'southern'): 5,
         ('same', 'southern'): 5,
         ('old', 'southern'): 5,
         ('powerful', 'southern'): 5,
         ('breathtaking', 'southern'): 5,
         ('beautiful', 'southern'): 5,
         ('Appalachian', 'southern'): 5,
         ('broad', 'southern'): 5,
         ('educational', 'southern'): 5,
         ('middle', 'southern'): 5,
         ('lower', 'southern'): 5,
         ('short', 'southern'): 5,
         ('American', 'southern'): 5,
         ('few', 'southern'): 5,
         ('gifted', 'southern'): 5,
         ('southern', 'vocal'): 5,
         ('outstanding', 'southern'): 5,
         ('most', 'third'): 5,
         ('most', 'outstanding'): 5,
         ('long', 'short'): 5,
         ('best', 'original'): 5,
         ('best', 'funny'): 5,
         ('enough', 'same'): 5,
         ('good', 'whole'): 5,
         ('crazy', 'good'): 5,
         ('clean', 'old'): 5,
         ('cinematic', 'old'): 5,
         ('fresh', 'old'): 5,
         ('dark', 'old'): 5,
         ('personal', 'young'): 5,
         ('comic', 'young'): 5,
         ('best', 'cinematic'): 5,
         ('best', 'fresh'): 5,
         ('better', 'worth'): 5,
         ('bad', 'worth'): 5,
         ('bad', 'sure'): 5,
         ('hard', 'little'): 5,
         ('good', 'sweet'): 5,
         ('good', 'preachy'): 5,
         ('nice', 'such'): 5,
         ('amazing', 'good'): 5,
         ('best', 'such'): 5,
         ('old', 'perfect'): 5,
         ('old', 'such'): 5,
         ('best', 'first'): 5,
         ('first', 'worth'): 5,
         ('direct', 'silent'): 5,
         ('1st', 'silent'): 5,
         ('early', 'silent'): 5,
         ('little', 'silent'): 5,
         ('silent', 'top'): 5,
         ('familiar', 'silent'): 5,
         ('Later', 'silent'): 5,
         ('silent', 'similar'): 5,
         ('glorious', 'silent'): 5,
         ('pristine', 'silent'): 5,
         ('silent', 'uphill'): 5,
         ('positive', 'silent'): 5,
         ('double', 'silent'): 5,
         ('silent', 'technical'): 5,
         ('most', 'silent'): 5,
         ('fortunate', 'silent'): 5,
         ('instrumental', 'silent'): 5,
         ('best', 'silent'): 5,
         ('difficult', 'silent'): 5,
         ('entire', 'silent'): 5,
         ('silent', 'wise'): 5,
         ('selective', 'silent'): 5,
         ('next', 'silent'): 5,
         ('silent', 'subject'): 5,
         ('right', 'silent'): 5,
         ('popular', 'silent'): 5,
         ('Punctured', 'silent'): 5,
         ('concrete', 'silent'): 5,
         ('long', 'silent'): 5,
         ('silent', 'silver'): 5,
         ('silent', 'subdued'): 5,
         ('early', 'short'): 5,
         ('female', 'little'): 5,
         ('best', 'low'): 5,
         ('much', 'younger'): 4,
         ('better', 'last'): 4,
         ('older', 'other'): 4,
         ('favourite', 'good'): 4,
         ('better', 'classic'): 4,
         ('entertaining', 'more'): 4,
         ('European', 'more'): 4,
         ('general', 'more'): 4,
         ('entertaining', 'new'): 4,
         ('best', 'entertaining'): 4,
         ('many', 'rich'): 4,
         ('rich', 'same'): 4,
         ('-', 'most'): 4,
         ('-', 'new'): 4,
         ('modern', 'own'): 4,
         ('fantastic', 'little'): 4,
         ('impossible', 'little'): 4,
         ('fantastic', 'least'): 4,
         ('least', 'new'): 4,
         ('least', 'real'): 4,
         ('new', 'religious'): 4,
         ('best', 'religious'): 4,
         ('human', 'same'): 4,
         ('fantastic', 'much'): 4,
         ('dead', 'much'): 4,
         ('most', 'much'): 4,
         ('best', 'much'): 4,
         ('fantastic', 'many'): 4,
         ('fantastic', 'new'): 4,
         ('best', 'fantastic'): 4,
         ('contemporary', 'same'): 4,
         ('European', 'best'): 4,
         ('many', 'predictable'): 4,
         ('many', 'possible'): 4,
         ('many', 'real'): 4,
         ('close', 'most'): 4,
         ('close', 'new'): 4,
         ('own', 'possible'): 4,
         ('new', 'possible'): 4,
         ('best', 'possible'): 4,
         ('best', 'dead'): 4,
         ('dead', 'real'): 4,
         ('initial', 'same'): 4,
         ('same', 'sexual'): 4,
         ('less', 'same'): 4,
         ('most', 'new'): 4,
         ('impossible', 'most'): 4,
         ('most', 'real'): 4,
         ('huge', 'own'): 4,
         ('own', 'real'): 4,
         ('own', 'single'): 4,
         ('huge', 'new'): 4,
         ('general', 'huge'): 4,
         ('new', 'ready'): 4,
         ('best', 'initial'): 4,
         ('best', 'sexual'): 4,
         ('best', 'less'): 4,
         ('real', 'sexual'): 4,
         ('American', 'few'): 4,
         ('American', 'own'): 4,
         ('general', 'great'): 4,
         ('few', 'general'): 4,
         ('first', 'general'): 4,
         ('few', 'own'): 4,
         ('few', 'young'): 4,
         ('handsome', 'own'): 4,
         ('other', 'several'): 4,
         ('major', 'other'): 4,
         ('good', 'handsome'): 4,
         ('brilliant', 'own'): 4,
         ('bad', 'best'): 4,
         ('low', 'young'): 4,
         ('surprised', 'young'): 4,
         ('brilliant', 'great'): 4,
         ('good', 'strong'): 4,
         ('happy', 'many'): 4,
         ('linear', 'little'): 4,
         ('confusing', 'little'): 4,
         ('enough', 'little'): 4,
         ('little', 'loud'): 4,
         ('many', 'slow'): 4,
         ('linear', 'many'): 4,
         ('confusing', 'many'): 4,
         ('clear', 'many'): 4,
         ('interested', 'many'): 4,
         ('major', 'many'): 4,
         ('few', 'more'): 4,
         ('few', 'sure'): 4,
         ('-', 'non'): 4,
         ('non', 'other'): 4,
         ('-', 'other'): 4,
         ('confusing', 'linear'): 4,
         ('clear', 'linear'): 4,
         ('linear', 'sure'): 4,
         ('European', 'sure'): 4,
         ('more', 'true'): 4,
         ('clear', 'confusing'): 4,
         ('confusing', 'sure'): 4,
         ('clear', 'sure'): 4,
         ('much', 'sure'): 4,
         ('more', 'sure'): 4,
         ('sure', 'sweet'): 4,
         ('enough', 'other'): 4,
         ('other', 'sensitive'): 4,
         ('able', 'little'): 4,
         ('big', 'many'): 4,
         ('gifted', 'many'): 4,
         ('local', 'many'): 4,
         ('big', 'other'): 4,
         ('able', 'big'): 4,
         ('big', 'important'): 4,
         ('big', 'classic'): 4,
         ('big', 'main'): 4,
         ('big', 'much'): 4,
         ('able', 'other'): 4,
         ('able', 'own'): 4,
         ('able', 'classic'): 4,
         ('able', 'much'): 4,
         ('gifted', 'most'): 4,
         ('great', 'important'): 4,
         ('brilliant', 'few'): 4,
         ('brilliant', 'most'): 4,
         ('classic', 'great'): 4,
         ('great', 'much'): 4,
         ('few', 'local'): 4,
         ('good', 'local'): 4,
         ('sensual', 'younger'): 4,
         ('dark', 'good'): 4,
         ('female', 'good'): 4,
         ('good', 'sexual'): 4,
         ('enough', 'good'): 4,
         ('first', 'sexual'): 4,
         ('more', 'young'): 4,
         ('American', 'same'): 4,
         ('funny', 'sad'): 4,
         ('funny', 'many'): 4,
         ('obvious', 'same'): 4,
         ('comedic', 'great'): 4,
         ('many', 'mighty'): 4,
         ('new', 'numerous'): 4,
         ('Estonian', 'corrupt'): 4,
         ('different', 'many'): 4,
         ('different', 'new'): 4,
         ('beautiful', 'excellent'): 4,
         ('excellent', 'great'): 4,
         ('great', 'strange'): 4,
         ('beautiful', 'own'): 4,
         ('least', 'other'): 4,
         ('better', 'real'): 4,
         ('better', 'subtle'): 4,
         ('other', 'simple'): 4,
         ('difficult', 'other'): 4,
         ('hard', 'other'): 4,
         ('necessary', 'other'): 4,
         ('many', 'wrong'): 4,
         ('first', 'half'): 4,
         ('entire', 'simple'): 4,
         ('simple', 'strong'): 4,
         ('respectful', 'simple'): 4,
         ('difficult', 'first'): 4,
         ('difficult', 'good'): 4,
         ('entire', 'respectful'): 4,
         ('-', 'good'): 4,
         ('good', 'hard'): 4,
         ('good', 'responsible'): 4,
         ('interesting', 'other'): 4,
         ('great', 'impressive'): 4,
         ('best', 'only'): 4,
         ('close', 'young'): 4,
         ('best', 'serious'): 4,
         ('last', 'more'): 4,
         ('long', 'strong'): 4,
         ('same', 'strong'): 4,
         ('amazing', 'strong'): 4,
         ('best', 'wonderful'): 4,
         ('best', 'technical'): 4,
         ('Young', 'great'): 4,
         ('beautiful', 'long'): 4,
         ('dark', 'great'): 4,
         ('beautiful', 'new'): 4,
         ('amazing', 'new'): 4,
         ('charming', 'low'): 4,
         ('favorite', 'good'): 4,
         ('high', 'most'): 4,
         ('funny', 'low'): 4,
         ('endless', 'own'): 4,
         ('amusing', 'own'): 4,
         ('own', 'private'): 4,
         ('greatest', 'own'): 4,
         ('clever', 'own'): 4,
         ('better', 'own'): 4,
         ('endless', 'good'): 4,
         ('good', 'private'): 4,
         ('good', 'proper'): 4,
         ('good', 'greatest'): 4,
         ('good', 'smart'): 4,
         ('greatest', 'little'): 4,
         ('wonderful', 'younger'): 4,
         ('fresh', 'little'): 4,
         ('little', 'nice'): 4,
         ('little', 'wonderful'): 4,
         ('big', 'same'): 4,
         ('funny', 'nice'): 4,
         ('big', 'funny'): 4,
         ('good', 'short'): 4,
         ('great', 'surf'): 4,
         ('great', 'least'): 4,
         ('low', 'real'): 4,
         ('little', 'traumatic'): 4,
         ('big', 'entire'): 4,
         ('good', 'traumatic'): 4,
         ('black', 'good'): 4,
         ('good', 'white'): 4,
         ('good', 'silly'): 4,
         ('beautiful', 'special'): 4,
         ('nice', 'true'): 4,
         ('first', 'true'): 4,
         ('last', 'nice'): 4,
         ('first', 'last'): 4,
         ('good', 'original'): 4,
         ('better', 'original'): 4,
         ('better', 'nice'): 4,
         ('good', 'surprised'): 4,
         ('glad', 'good'): 4,
         ('good', 'hot'): 4,
         ('great', 'nice'): 4,
         ('own', 'true'): 4,
         ('old', 'true'): 4,
         ('absolute', 'same'): 4,
         ('same', 'silly'): 4,
         ('interesting', 'nice'): 4,
         ('first', 'much'): 4,
         ('great', 'right'): 4,
         ('own', 'right'): 4,
         ('first', 'funny'): 4,
         ('final', 'true'): 4,
         ('full', 'own'): 4,
         ('final', 'own'): 4,
         ('big', 'wise'): 4,
         ('much', 'original'): 4,
         ('first', 'original'): 4,
         ('first', 'various'): 4,
         ('first', 'high'): 4,
         ('fantastic', 'first'): 4,
         ('original', 'various'): 4,
         ('full', 'original'): 4,
         ('more', 'original'): 4,
         ('final', 'same'): 4,
         ('pure', 'same'): 4,
         ('other', 'slapstick'): 4,
         ('apparent', 'other'): 4,
         ('brilliant', 'various'): 4,
         ('fantastic', 'various'): 4,
         ('full', 'various'): 4,
         ('brilliant', 'fantastic'): 4,
         ('brilliant', 'full'): 4,
         ('fantastic', 'full'): 4,
         ('few', 'full'): 4,
         ('fresh', 'most'): 4,
         ('most', 'top'): 4,
         ('inventive', 'most'): 4,
         ('little', 'top'): 4,
         ('inventive', 'little'): 4,
         ('great', 'special'): 4,
         ('intelligent', 'other'): 4,
         ('intelligent', 'young'): 4,
         ('intelligent', 'strong'): 4,
         ('many', 'realistic'): 4,
         ('most', 'realistic'): 4,
         ('close', 'other'): 4,
         ('other', 'powerful'): 4,
         ('beautiful', 'young'): 4,
         ('strong', 'young'): 4,
         ('best', 'short'): 4,
         ('old', 'short'): 4,
         ('old', 'outstanding'): 4,
         ('powerful', 'strong'): 4,
         ('original', 'perfect'): 4,
         ('funny', 'original'): 4,
         ('great', 'whole'): 4,
         ('enough', 'real'): 4,
         ('few', 'real'): 4,
         ('new', 'only'): 4,
         ('best', 'early'): 4,
         ('old', 'pleased'): 4,
         ('pleased', 'young'): 4,
         ('best', 'pleased'): 4,
         ('greatest', 'young'): 4,
         ('old', 'personal'): 4,
         ('conflicting', 'old'): 4,
         ('further', 'old'): 4,
         ('old', 'potential'): 4,
         ('evil', 'old'): 4,
         ('dirty', 'old'): 4,
         ('old', 'visual'): 4,
         ('lasting', 'old'): 4,
         ('comic', 'old'): 4,
         ('conflicting', 'young'): 4,
         ('further', 'young'): 4,
         ('potential', 'young'): 4,
         ('evil', 'young'): 4,
         ('clean', 'young'): 4,
         ('visual', 'young'): 4,
         ('cinematic', 'young'): 4,
         ('fresh', 'young'): 4,
         ('lasting', 'young'): 4,
         ('dark', 'young'): 4,
         ('best', 'personal'): 4,
         ('best', 'conflicting'): 4,
         ('best', 'further'): 4,
         ('best', 'potential'): 4,
         ('best', 'evil'): 4,
         ('best', 'visual'): 4,
         ('best', 'lasting'): 4,
         ('bad', 'dark'): 4,
         ('Beguiled', 'bad'): 4,
         ('bad', 'sweet'): 4,
         ('bad', 'high'): 4,
         ('most', 'right'): 4,
         ('Beguiled', 'most'): 4,
         ('early', 'most'): 4,
         ('most', 'sweet'): 4,
         ('Beguiled', 'sweet'): 4,
         ('big', 'early'): 4,
         ('cheap', 'long'): 4,
         ('big', 'long'): 4,
         ('available', 'more'): 4,
         ('nice', 'sweet'): 4,
         ('good', 'subliminal'): 4,
         ('good', 'thin'): 4,
         ('good', 'scary'): 4,
         ('amusing', 'nice'): 4,
         ('nice', 'perfect'): 4,
         ('catchy', 'good'): 4,
         ('engaging', 'good'): 4,
         ('good', 'hilarious'): 4,
         ('emphatic', 'good'): 4,
         ('good', 'intelligent'): 4,
         ('good', 'scared'): 4,
         ('good', 'worthy'): 4,
         ('good', 'solid'): 4,
         ('good', 'superb'): 4,
         ('good', 'unrecognisable'): 4,
         ('perfect', 'such'): 4,
         ('many', 'several'): 4,
         ('High', 'many'): 4,
         ('amazing', 'little'): 4,
         ('good', 'serious'): 4,
         ('about', 'more'): 4,
         ('true', 'young'): 4,
         ('same', 'wild'): 4,
         ('long', 'such'): 4,
         ('old', 'rare'): 4,
         ('perfect', 'rare'): 4,
         ('great', 'rare'): 4,
         ('funny', 'rare'): 4,
         ('available', 'funny'): 4,
         ('funny', 'hysterical'): 4,
         ('next', 'such'): 4,
         ('new', 'next'): 4,
         ('difficult', 'short'): 4,
         ('right', 'short'): 4,
         ('/>My', 'good'): 4,
         ('good', 'impossible'): 4,
         ('last', 'new'): 4,
         ('older', 'true'): 4,
         ('Tomanian', 'heroic'): 4,
         ('Jewish', 'Tomanian'): 4,
         ('more', 'ridiculous'): 4,
         ('Jewish', 'heroic'): 4,
         ('entertaining', 'first'): 4,
         ('entertaining', 'worth'): 4,
         ('good', 'worth'): 4,
         ('real', 'right'): 4,
         ('old', 'real'): 4,
         ('big', 'special'): 4,
         ('early', 'such'): 4,
         ('great', 'sound'): 4,
         ('sound', 'such'): 4,
         ('greatest', 'sound'): 4,
         ('great', 'greatest'): 4,
         ('real', 'similar'): 4,
         ('next', 'real'): 4,
         ('funny', 'modern'): 4,
         ('funny', 'general'): 4,
         ('near', 'realistic'): 4,
         ('amazing', 'perfect'): 4,
         ('easy', 'perfect'): 4,
         ('perfect', 'realistic'): 4,
         ('amazing', 'easy'): 4,
         ('easy', 'realistic'): 4,
         ('comic', 'short'): 4,
         ('married', 'short'): 4,
         ('hot', 'short'): 4,
         ('comic', 'married'): 4,
         ('comic', 'hot'): 4,
         ('comic', 'first'): 4,
         ('hot', 'married'): 4,
         ('desperate', 'realistic'): 4,
         ('dated', 'realistic'): 4,
         ('desperate', 'other'): 4,
         ('desperate', 'strong'): 4,
         ('dated', 'desperate'): 4,
         ('new', 'typical'): 4,
         ('cool', 'other'): 4,
         ('dated', 'other'): 4,
         ('dated', 'strong'): 4,
         ('amazing', 'convincing'): 4,
         ('amazing', 'bad'): 4,
         ('convincing', 'other'): 4,
         ('bad', 'convincing'): 4,
         ('other', 'sexual'): 4,
         ('Christian', 'surprised'): 4,
         ('serious', 'surprised'): 4,
         ...})
In [6]:
# ネガティブレビューの共起ペア生成
cooccurrence_negative = generate_cooccurrence(review_negative['tokens'])
cooccurrence_negative
Out[6]:
Counter({('bad', 'good'): 51,
         ('bad', 'terrible'): 27,
         ('good', 'red'): 24,
         ('good', 'terrible'): 23,
         ('better', 'good'): 23,
         ('good', 'many'): 23,
         ('good', 'other'): 22,
         ('main', 'red'): 21,
         ('bad', 'same'): 20,
         ('bad', 'first'): 20,
         ('good', 'little'): 19,
         ('good', 'such'): 19,
         ('bad', 'better'): 18,
         ('funny', 'many'): 18,
         ('first', 'good'): 17,
         ('bad', 'many'): 17,
         ('bad', 'last'): 16,
         ('bad', 'other'): 16,
         ('good', 'great'): 16,
         ('good', 'stupid'): 16,
         ('evil', 'such'): 16,
         ('good', 'more'): 16,
         ('good', 'real'): 16,
         ('better', 'many'): 16,
         ('genetic', 'psychical'): 16,
         ('bad', 'little'): 15,
         ('bad', 'particular'): 15,
         ('awful', 'red'): 15,
         ('modern', 'red'): 15,
         ('bad', 'whole'): 14,
         ('bad', 'worst'): 14,
         ('good', 'same'): 14,
         ('-', 'real'): 14,
         ('good', 'least'): 14,
         ('awful', 'bad'): 14,
         ('bad', 'stupid'): 14,
         ('bad', 'new'): 14,
         ('bad', 'funny'): 14,
         ('good', 'main'): 14,
         ('big', 'red'): 14,
         ('evil', 'red'): 14,
         ('more', 'same'): 14,
         ('first', 'more'): 14,
         ('black', 'red'): 14,
         ('red', 'such'): 14,
         ('emotional', 'red'): 14,
         ('baroque', 'red'): 14,
         ('famous', 'red'): 14,
         ('good', 'young'): 14,
         ('terrible', 'worst'): 13,
         ('bad', 'interesting'): 13,
         ('good', 'last'): 13,
         ('real', 'true'): 13,
         ('big', 'good'): 13,
         ('better', 'funny'): 13,
         ('many', 'more'): 13,
         ('good', 'interesting'): 12,
         ('better', 'other'): 12,
         ('first', 'great'): 12,
         ('funny', 'good'): 12,
         ('good', 'much'): 12,
         ('awful', 'good'): 12,
         ('first', 'same'): 12,
         ('bad', 'horrible'): 12,
         ('good', 'horrible'): 12,
         ('first', 'little'): 12,
         ('new', 'same'): 12,
         ('evil', 'great'): 12,
         ('bad', 'more'): 12,
         ('evil', 'good'): 12,
         ('criminal', 'real'): 12,
         ('least', 'real'): 12,
         ('Nazi', 'real'): 12,
         ('first', 'sexy'): 12,
         ('more', 'sexy'): 12,
         ('many', 'such'): 12,
         ('good', 'hard'): 11,
         ('best', 'good'): 11,
         ('bad', 'great'): 11,
         ('first', 'sure'): 11,
         ('good', 'new'): 11,
         ('-', 'good'): 11,
         ('first', 'red'): 11,
         ('bad', 'best'): 10,
         ('bad', 'worse'): 10,
         ('old', 'other'): 10,
         ('first', 'other'): 10,
         ('only', 'special'): 10,
         ('emotional', 'good'): 10,
         ('first', 'much'): 10,
         ('good', 'special'): 10,
         ('bad', 'slow'): 10,
         ('better', 'great'): 10,
         ('ridiculous', 'such'): 10,
         ('main', 'more'): 10,
         ('main', 'same'): 10,
         ('bad', 'red'): 10,
         ('more', 'red'): 10,
         ('emotional', 'such'): 10,
         ('red', 'same'): 10,
         ('better', 'more'): 10,
         ('hard', 'terrible'): 9,
         ('better', 'terrible'): 9,
         ('good', 'worst'): 9,
         ('first', 'small'): 9,
         ('good', 'own'): 9,
         ('-', 'bad'): 9,
         ('bad', 'such'): 9,
         ('awful', 'same'): 9,
         ('other', 'same'): 9,
         ('good', 'only'): 9,
         ('bad', 'different'): 9,
         ('good', 'second'): 9,
         ('bad', 'only'): 9,
         ('little', 'only'): 9,
         ('many', 'real'): 9,
         ('horrible', 'real'): 9,
         ('great', 'many'): 9,
         ('bad', 'small'): 8,
         ('bad', 'convincing'): 8,
         ('first', 'interesting'): 8,
         ('bad', 'serious'): 8,
         ('bad', 'own'): 8,
         ('bad', 'sure'): 8,
         ('high', 'least'): 8,
         ('least', 'slow'): 8,
         ('least', 'same'): 8,
         ('first', 'many'): 8,
         ('black', 'emotional'): 8,
         ('same', 'slow'): 8,
         ('first', 'special'): 8,
         ('black', 'good'): 8,
         ('good', 'tough'): 8,
         ('first', 'last'): 8,
         ('last', 'little'): 8,
         ('big', 'much'): 8,
         ('much', 'red'): 8,
         ('last', 'main'): 8,
         ('last', 'red'): 8,
         ('big', 'main'): 8,
         ('fantastic', 'such'): 8,
         ('red', 'ridiculous'): 8,
         ('own', 'such'): 8,
         ('own', 'red'): 8,
         ('interesting', 'red'): 8,
         ('great', 'other'): 8,
         ('new', 'red'): 8,
         ('original', 'red'): 8,
         ('-', 'red'): 8,
         ('good', 'most'): 8,
         ('most', 'real'): 8,
         ('horrible', 'other'): 8,
         ('absurd', 'many'): 8,
         ('sexy', 'small'): 8,
         ('sexy', 'silly'): 8,
         ('Sunny', 'sexy'): 8,
         ('sexy', 'wounded'): 8,
         ('medical', 'sexy'): 8,
         ('azure', 'beautiful'): 8,
         ('beautiful', 'female'): 8,
         ('bad', 'hot'): 8,
         ('bad', 'clear'): 8,
         ('least', 'terrible'): 8,
         ('bad', 'insane'): 8,
         ('Spanish', 'main'): 8,
         ('main', 'pretty'): 8,
         ('genetic', 'whole'): 8,
         ('psychical', 'whole'): 8,
         ('genetic', 'psychic'): 8,
         ('psychic', 'psychical'): 8,
         ('bad', 'genetic'): 8,
         ('bad', 'psychical'): 8,
         ('terrible', 'whole'): 7,
         ('bad', 'hard'): 7,
         ('better', 'hard'): 7,
         ('better', 'interesting'): 7,
         ('last', 'same'): 7,
         ('OK', 'black'): 7,
         ('great', 'interesting'): 7,
         ('first', 'own'): 7,
         ('good', 'short'): 7,
         ('little', 'much'): 7,
         ('bad', 'familiar'): 7,
         ('Bad', 'bad'): 7,
         ('bad', 'black'): 7,
         ('lazy', 'real'): 7,
         ('first', 'high'): 7,
         ('awful', 'other'): 7,
         ('awful', 'first'): 7,
         ('awful', 'black'): 7,
         ('funny', 'other'): 7,
         ('many', 'same'): 7,
         ('first', 'only'): 7,
         ('old', 'same'): 7,
         ('first', 'slow'): 7,
         ('boring', 'first'): 7,
         ('first', 'silly'): 7,
         ('poor', 'same'): 7,
         ('awful', 'such'): 7,
         ('little', 'stupid'): 7,
         ('dimensional', 'evil'): 7,
         ('evil', 'much'): 7,
         ('evil', 'first'): 7,
         ('evil', 'horrible'): 7,
         ('20th', 'evil'): 7,
         ('evil', 'sympathetic'): 7,
         ('boring', 'great'): 7,
         ('glossy', 'red'): 7,
         ('more', 'much'): 7,
         ('impuissant', 'red'): 7,
         ('interested', 'main'): 7,
         ('interested', 'red'): 7,
         ('bizarre', 'red'): 7,
         ('red', 'very'): 7,
         ('fortunate', 'red'): 7,
         ('red', 'tenuous'): 7,
         ('red', 'tremendous'): 7,
         ('latter', 'red'): 7,
         ('general', 'main'): 7,
         ('general', 'red'): 7,
         ('average', 'red'): 7,
         ('quotidian', 'red'): 7,
         ('impressive', 'red'): 7,
         ('grotesque', 'red'): 7,
         ('fantastic', 'red'): 7,
         ('good', 'ridiculous'): 7,
         ('evil', 'main'): 7,
         ('milquetoast', 'red'): 7,
         ('final', 'red'): 7,
         ('corporate', 'red'): 7,
         ('red', 'spiritual'): 7,
         ('red', 'religious'): 7,
         ('altruistic', 'red'): 7,
         ('red', 'sympathetic'): 7,
         ('blunt', 'red'): 7,
         ('dimensional', 'red'): 7,
         ('modern', 'more'): 7,
         ('more', 'other'): 7,
         ('good', 'modern'): 7,
         ('elderly', 'red'): 7,
         ('confused', 'red'): 7,
         ('definite', 'red'): 7,
         ('indispensable', 'red'): 7,
         ('modern', 'same'): 7,
         ('funerary', 'red'): 7,
         ('red', 'suicidal'): 7,
         ('red', 'simple'): 7,
         ('red', 'second'): 7,
         ('other', 'red'): 7,
         ('appropriate', 'red'): 7,
         ('red', 'sacred'): 7,
         ('extreme', 'red'): 7,
         ('inherent', 'red'): 7,
         ('bold', 'red'): 7,
         ('receptive', 'red'): 7,
         ('indulgent', 'red'): 7,
         ('red', 'risky'): 7,
         ('red', 'vulnerable'): 7,
         ('enormous', 'red'): 7,
         ('earliest', 'red'): 7,
         ('dead', 'red'): 7,
         ('Hebrew', 'red'): 7,
         ('anachronistic', 'red'): 7,
         ('accepted', 'red'): 7,
         ('mysterious', 'red'): 7,
         ('frieze', 'red'): 7,
         ('complete', 'red'): 7,
         ('red', 'resplendent'): 7,
         ('formal', 'red'): 7,
         ('musical', 'red'): 7,
         ('quasi', 'red'): 7,
         ('great', 'red'): 7,
         ('red', 'restive'): 7,
         ('absolute', 'red'): 7,
         ('best', 'red'): 7,
         ('masterful', 'red'): 7,
         ('dramatic', 'red'): 7,
         ('poignant', 'red'): 7,
         ('alive', 'red'): 7,
         ('minor', 'red'): 7,
         ('first', 'new'): 7,
         ('brutal', 'real'): 7,
         ('big', 'least'): 7,
         ('horrible', 'many'): 7,
         ('funny', 'more'): 7,
         ('big', 'little'): 7,
         ('bad', 'pathetic'): 7,
         ('bad', 'young'): 7,
         ('bad', 'major'): 7,
         ('bad', 'perfect'): 6,
         ('terrible', 'top'): 6,
         ('good', 'top'): 6,
         ('good', 'whole'): 6,
         ('better', 'whole'): 6,
         ('good', 'small'): 6,
         ('OK', 'good'): 6,
         ('interesting', 'same'): 6,
         ('bad', 'decent'): 6,
         ('generous', 'plastic'): 6,
         ('funny', 'stupid'): 6,
         ('funny', 'plastic'): 6,
         ('convincing', 'good'): 6,
         ('funny', 'great'): 6,
         ('interesting', 'own'): 6,
         ('Original', 'bad'): 6,
         ('little', 'many'): 6,
         ('bad', 'much'): 6,
         ('bad', 'predictable'): 6,
         ('black', 'first'): 6,
         ('better', 'first'): 6,
         ('-', 'true'): 6,
         ('black', 'fair'): 6,
         ('bad', 'original'): 6,
         ('high', 'slow'): 6,
         ('OK', 'high'): 6,
         ('good', 'high'): 6,
         ('awful', 'least'): 6,
         ('least', 'only'): 6,
         ('OK', 'least'): 6,
         ('least', 'special'): 6,
         ('least', 'much'): 6,
         ('other', 'whole'): 6,
         ('many', 'whole'): 6,
         ('awful', 'emotional'): 6,
         ('awful', 'slow'): 6,
         ('only', 'other'): 6,
         ('OK', 'emotional'): 6,
         ('OK', 'slow'): 6,
         ('slow', 'special'): 6,
         ('good', 'slow'): 6,
         ('OK', 'special'): 6,
         ('OK', 'fancy'): 6,
         ('much', 'special'): 6,
         ('good', 'single'): 6,
         ('average', 'good'): 6,
         ('much', 'same'): 6,
         ('black', 'same'): 6,
         ('first', 'such'): 6,
         ('awful', 'terrible'): 6,
         ('bad', 'female'): 6,
         ('few', 'good'): 6,
         ('second', 'stupid'): 6,
         ('bad', 'second'): 6,
         ('first', 'second'): 6,
         ('little', 'second'): 6,
         ('bad', 'single'): 6,
         ('bad', 'special'): 6,
         ('first', 'tough'): 6,
         ('first', 'horrible'): 6,
         ('little', 'special'): 6,
         ('many', 'new'): 6,
         ('infamous', 'same'): 6,
         ('great', 'much'): 6,
         ('first', 'ridiculous'): 6,
         ('bad', 'entire'): 6,
         ('new', 'own'): 6,
         ('great', 'new'): 6,
         ('much', 'such'): 6,
         ('awful', 'main'): 6,
         ('big', 'such'): 6,
         ('bad', 'latter'): 6,
         ('black', 'main'): 6,
         ('main', 'modern'): 6,
         ('main', 'such'): 6,
         ('emotional', 'main'): 6,
         ('baroque', 'main'): 6,
         ('famous', 'main'): 6,
         ('emotional', 'evil'): 6,
         ('dead', 'good'): 6,
         ('baroque', 'good'): 6,
         ('famous', 'good'): 6,
         ('new', 'other'): 6,
         ('personal', 'real'): 6,
         ('enjoyment', 'real'): 6,
         ('good', 'successful'): 6,
         ('awful', 'real'): 6,
         ('miserable', 'real'): 6,
         ('least', 'true'): 6,
         ('real', 'short'): 6,
         ('real', 'successful'): 6,
         ('alcoholic', 'real'): 6,
         ('nice', 'real'): 6,
         ('high', 'real'): 6,
         ('/>These', 'real'): 6,
         ('beautiful', 'real'): 6,
         ('indulgent', 'real'): 6,
         ('narcissistic', 'real'): 6,
         ('big', 'real'): 6,
         ('beefy', 'real'): 6,
         ('real', 'skinny'): 6,
         ('early', 'real'): 6,
         ('Many', 'real'): 6,
         ('real', 'vulgar'): 6,
         ('larger', 'real'): 6,
         ('dumb', 'real'): 6,
         ('real', 'special'): 6,
         ('real', 'stupid'): 6,
         ('dead', 'real'): 6,
         ('lousy', 'real'): 6,
         ('-', 'least'): 6,
         ('beautiful', 'many'): 6,
         ('most', 'same'): 6,
         ('absurd', 'funny'): 6,
         ('absurd', 'better'): 6,
         ('special', 'such'): 6,
         ('bad', 'most'): 6,
         ('least', 'worst'): 6,
         ('more', 'small'): 6,
         ('Sunny', 'first'): 6,
         ('first', 'wounded'): 6,
         ('first', 'medical'): 6,
         ('more', 'silly'): 6,
         ('obvious', 'sexy'): 6,
         ('Sunny', 'more'): 6,
         ('bad', 'sad'): 6,
         ('more', 'wounded'): 6,
         ('more', 'sure'): 6,
         ('little', 'more'): 6,
         ('little', 'same'): 6,
         ('medical', 'more'): 6,
         ('good', 'wrong'): 6,
         ('hard', 'other'): 6,
         ('last', 'many'): 6,
         ('successful', 'young'): 6,
         ('many', 'young'): 6,
         ('boring', 'violent'): 6,
         ('clear', 'violent'): 6,
         ('bad', 'blue'): 6,
         ('friendly', 'violent'): 6,
         ('main', 'many'): 6,
         ('double', 'good'): 6,
         ('bad', 'surprised'): 6,
         ('evil', 'many'): 6,
         ('professional', 'terrible'): 6,
         ('such', 'wicked'): 6,
         ('equivalent', 'such'): 6,
         ('20th', 'such'): 6,
         ('such', 'sweet'): 6,
         ('human', 'such'): 6,
         ('pure', 'such'): 6,
         ('daily', 'such'): 6,
         ('constant', 'such'): 6,
         ('cruel', 'such'): 6,
         ('monstrous', 'such'): 6,
         ('insane', 'such'): 6,
         ('preposterous', 'such'): 6,
         ('ill', 'such'): 6,
         ('lost', 'such'): 6,
         ('Sane', 'such'): 6,
         ('addicted', 'such'): 6,
         ('real', 'such'): 6,
         ('horrible', 'such'): 6,
         ('legal', 'such'): 6,
         ('such', 'vast'): 6,
         ('deceitful', 'such'): 6,
         ('selfish', 'such'): 6,
         ('less', 'such'): 6,
         ('civil', 'such'): 6,
         ('rational', 'such'): 6,
         ('concerned', 'such'): 6,
         ('such', 'vile'): 6,
         ('intellectual', 'such'): 6,
         ('brutal', 'such'): 6,
         ('perfect', 'terrible'): 5,
         ('small', 'terrible'): 5,
         ('same', 'terrible'): 5,
         ('hard', 'worst'): 5,
         ('interesting', 'whole'): 5,
         ('better', 'worst'): 5,
         ('good', 'less'): 5,
         ('best', 'better'): 5,
         ('better', 'same'): 5,
         ('black', 'funny'): 5,
         ('OK', 'funny'): 5,
         ('funny', 'old'): 5,
         ('good', 'proper'): 5,
         ('good', 'worth'): 5,
         ('first', 'funny'): 5,
         ('complete', 'good'): 5,
         ('good', 'particular'): 5,
         ('bad', 'lousy'): 5,
         ('bad', 'cheesy'): 5,
         ('bad', 'super'): 5,
         ('bad', 'real'): 5,
         ('interesting', 'sure'): 5,
         ('bad', 'old'): 5,
         ('gay', 'old'): 5,
         ('short', 'special'): 5,
         ('high', 'old'): 5,
         ('high', 'special'): 5,
         ('high', 'same'): 5,
         ('special', 'top'): 5,
         ('awful', 'whole'): 5,
         ('predictable', 'whole'): 5,
         ('first', 'whole'): 5,
         ('many', 'other'): 5,
         ('black', 'other'): 5,
         ('many', 'subtle'): 5,
         ('boring', 'many'): 5,
         ('much', 'only'): 5,
         ('first', 'old'): 5,
         ('black', 'slow'): 5,
         ('first', 'poor'): 5,
         ('good', 'odd'): 5,
         ('decent', 'much'): 5,
         ('black', 'special'): 5,
         ('awful', 'nice'): 5,
         ('bad', 'embarrassing'): 5,
         ('bad', 'easy'): 5,
         ('impossible', 'terrible'): 5,
         ('basic', 'same'): 5,
         ('different', 'same'): 5,
         ('few', 'predictable'): 5,
         ('last', 'second'): 5,
         ('special', 'stupid'): 5,
         ('horrible', 'stupid'): 5,
         ('bad', 'tough'): 5,
         ('good', 'obvious'): 5,
         ('little', 'tough'): 5,
         ('ridiculous', 'same'): 5,
         ('same', 'unlikely'): 5,
         ('evil', 'simplistic'): 5,
         ('deep', 'evil'): 5,
         ('boring', 'evil'): 5,
         ('confusing', 'evil'): 5,
         ('evil', 'inept'): 5,
         ('evil', 'phsycotic'): 5,
         ('great', 'little'): 5,
         ('great', 'main'): 5,
         ('interesting', 'new'): 5,
         ('long', 'more'): 5,
         ('main', 'much'): 5,
         ('big', 'last'): 5,
         ('last', 'new'): 5,
         ('awful', 'big'): 5,
         ('bad', 'big'): 5,
         ('big', 'more'): 5,
         ('big', 'interesting'): 5,
         ('big', 'other'): 5,
         ('big', 'first'): 5,
         ('fantastic', 'good'): 5,
         ('main', 'other'): 5,
         ('interesting', 'more'): 5,
         ('great', 'more'): 5,
         ('good', 'indulgent'): 5,
         ('dramatic', 'good'): 5,
         ('black', 'such'): 5,
         ('black', 'great'): 5,
         ('first', 'modern'): 5,
         ('original', 'same'): 5,
         ('good', 'true'): 5,
         ('beautiful', 'nice'): 5,
         ('least', 'most'): 5,
         ('Many', 'many'): 5,
         ('horrible', 'more'): 5,
         ('best', 'many'): 5,
         ('many', 'negative'): 5,
         ('many', 'obnoxious'): 5,
         ('great', 'old'): 5,
         ('only', 'such'): 5,
         ('big', 'only'): 5,
         ('first', 'most'): 5,
         ('dead', 'young'): 5,
         ('first', 'particular'): 5,
         ('more', 'tough'): 5,
         ('long', 'sexy'): 5,
         ('good', 'impossible'): 5,
         ('first', 'typical'): 5,
         ('bad', 'boring'): 5,
         ('better', 'boring'): 5,
         ('better', 'few'): 5,
         ('least', 'little'): 5,
         ('adequate', 'bad'): 5,
         ('bad', 'uncomprehended'): 5,
         ('bad', 'pastel'): 5,
         ('bad', 'coloured'): 5,
         ('bad', 'physical'): 5,
         ('bad', 'psychological'): 5,
         ('bad', 'tacky'): 5,
         ('bad', 'hilarious'): 5,
         ('bad', 'bankrupt'): 5,
         ('bad', 'top'): 4,
         ('OK', 'top'): 4,
         ('best', 'terrible'): 4,
         ('less', 'terrible'): 4,
         ('interesting', 'terrible'): 4,
         ('hard', 'same'): 4,
         ('bad', 'racist'): 4,
         ('bad', 'less'): 4,
         ('OK', 'bad'): 4,
         ('OK', 'whole'): 4,
         ('same', 'whole'): 4,
         ('same', 'worst'): 4,
         ('same', 'small'): 4,
         ('better', 'last'): 4,
         ('OK', 'same'): 4,
         ('irish', 'oirish'): 4,
         ('funny', 'generous'): 4,
         ('OK', 'old'): 4,
         ('good', 'mediocre'): 4,
         ('bad', 'worth'): 4,
         ('slow', 'wooden'): 4,
         ('great', 'own'): 4,
         ('complete', 'great'): 4,
         ('complete', 'first'): 4,
         ('funny', 'own'): 4,
         ('good', 'next'): 4,
         ('many', 'particular'): 4,
         ('many', 'much'): 4,
         ('familiar', 'many'): 4,
         ('best', 'full'): 4,
         ('bad', 'worthy'): 4,
         ('bad', 'right'): 4,
         ('bad', 'next'): 4,
         ('bleak', 'sloppy'): 4,
         ('-', 'black'): 4,
         ('better', 'sure'): 4,
         ('better', 'common'): 4,
         ('black', 'full'): 4,
         ('hard', 'old'): 4,
         ('old', 'such'): 4,
         ('least', 'short'): 4,
         ('awful', 'high'): 4,
         ('emotional', 'high'): 4,
         ('high', 'silly'): 4,
         ('fancy', 'high'): 4,
         ('black', 'high'): 4,
         ('high', 'poor'): 4,
         ('least', 'top'): 4,
         ('only', 'top'): 4,
         ('least', 'other'): 4,
         ('least', 'many'): 4,
         ('least', 'old'): 4,
         ('emotional', 'least'): 4,
         ('funny', 'least'): 4,
         ('first', 'least'): 4,
         ('fancy', 'least'): 4,
         ('black', 'least'): 4,
         ('least', 'poor'): 4,
         ('funny', 'whole'): 4,
         ('slow', 'whole'): 4,
         ('special', 'whole'): 4,
         ('right', 'slow'): 4,
         ('right', 'same'): 4,
         ('emotional', 'grotesque'): 4,
         ('good', 'grotesque'): 4,
         ('black', 'grotesque'): 4,
         ('emotional', 'other'): 4,
         ('other', 'slow'): 4,
         ('odd', 'other'): 4,
         ('much', 'other'): 4,
         ('emotional', 'many'): 4,
         ('many', 'special'): 4,
         ('funny', 'subtle'): 4,
         ('old', 'slow'): 4,
         ('good', 'old'): 4,
         ('predictable', 'slow'): 4,
         ('good', 'predictable'): 4,
         ('poor', 'predictable'): 4,
         ('emotional', 'slow'): 4,
         ('emotional', 'first'): 4,
         ('emotional', 'special'): 4,
         ('emotional', 'interested'): 4,
         ('average', 'emotional'): 4,
         ('emotional', 'fancy'): 4,
         ('emotional', 'much'): 4,
         ('emotional', 'same'): 4,
         ('funny', 'slow'): 4,
         ('boring', 'funny'): 4,
         ('funny', 'same'): 4,
         ('boring', 'slow'): 4,
         ('fancy', 'slow'): 4,
         ('poor', 'slow'): 4,
         ('decent', 'good'): 4,
         ('boring', 'special'): 4,
         ('single', 'special'): 4,
         ('fancy', 'special'): 4,
         ('same', 'special'): 4,
         ('good', 'interested'): 4,
         ('black', 'interested'): 4,
         ('boring', 'much'): 4,
         ('boring', 'good'): 4,
         ('black', 'boring'): 4,
         ('dull', 'good'): 4,
         ('black', 'single'): 4,
         ('good', 'silly'): 4,
         ('cheap', 'good'): 4,
         ('average', 'black'): 4,
         ('black', 'fancy'): 4,
         ('black', 'much'): 4,
         ('cool', 'good'): 4,
         ('disappointing', 'same'): 4,
         ('giant', 'nice'): 4,
         ('bad', 'nice'): 4,
         ('nice', 'terrible'): 4,
         ('nice', 'whole'): 4,
         ('female', 'nice'): 4,
         ('awful', 'extreme'): 4,
         ('other', 'such'): 4,
         ('other', 'terrible'): 4,
         ('basic', 'other'): 4,
         ('other', 'wrong'): 4,
         ('sure', 'whole'): 4,
         ('awful', 'final'): 4,
         ('awful', 'giant'): 4,
         ('bad', 'giant'): 4,
         ('giant', 'terrible'): 4,
         ('giant', 'whole'): 4,
         ('able', 'bad'): 4,
         ('bad', 'impossible'): 4,
         ('bad', 'wrong'): 4,
         ('female', 'terrible'): 4,
         ('terrible', 'wrong'): 4,
         ('female', 'whole'): 4,
         ('same', 'wrong'): 4,
         ('great', 'last'): 4,
         ('first', 'stupid'): 4,
         ('stupid', 'tough'): 4,
         ('single', 'stupid'): 4,
         ('only', 'stupid'): 4,
         ('stupid', 'uninspired'): 4,
         ('bright', 'good'): 4,
         ('bright', 'first'): 4,
         ('first', 'obvious'): 4,
         ('last', 'only'): 4,
         ('best', 'horrible'): 4,
         ('better', 'big'): 4,
         ('infamous', 'new'): 4,
         ('many', 'ridiculous'): 4,
         ('first', 'willing'): 4,
         ('entire', 'little'): 4,
         ('funny', 'horrible'): 4,
         ('Terrible', 'good'): 4,
         ('good', 'ill'): 4,
         ('interesting', 'main'): 4,
         ('cheap', 'great'): 4,
         ('average', 'main'): 4,
         ('better', 'gory'): 4,
         ('absurd', 'great'): 4,
         ('absurd', 'good'): 4,
         ('better', 'bitter'): 4,
         ('interesting', 'much'): 4,
         ('last', 'more'): 4,
         ('famous', 'last'): 4,
         ('awful', 'evil'): 4,
         ('awful', 'own'): 4,
         ('awful', 'interesting'): 4,
         ('awful', 'modern'): 4,
         ('awful', 'new'): 4,
         ('-', 'awful'): 4,
         ('awful', 'baroque'): 4,
         ('awful', 'famous'): 4,
         ('big', 'evil'): 4,
         ('big', 'black'): 4,
         ('big', 'modern'): 4,
         ('big', 'emotional'): 4,
         ('big', 'same'): 4,
         ('-', 'big'): 4,
         ('baroque', 'big'): 4,
         ('big', 'famous'): 4,
         ('modern', 'tenuous'): 4,
         ('good', 'latter'): 4,
         ('evil', 'fantastic'): 4,
         ('evil', 'ridiculous'): 4,
         ('more', 'ridiculous'): 4,
         ('bad', 'main'): 4,
         ('first', 'main'): 4,
         ('evil', 'own'): 4,
         ('black', 'evil'): 4,
         ('evil', 'modern'): 4,
         ('baroque', 'evil'): 4,
         ('evil', 'famous'): 4,
         ('more', 'new'): 4,
         ('anachronistic', 'good'): 4,
         ('good', 'musical'): 4,
         ('own', 'same'): 4,
         ('interesting', 'such'): 4,
         ('black', 'modern'): 4,
         ('baroque', 'black'): 4,
         ('black', 'famous'): 4,
         ('modern', 'such'): 4,
         ('emotional', 'modern'): 4,
         ('baroque', 'modern'): 4,
         ('famous', 'modern'): 4,
         ('baroque', 'such'): 4,
         ('great', 'such'): 4,
         ('famous', 'such'): 4,
         ('minor', 'such'): 4,
         ('complete', 'other'): 4,
         ('best', 'other'): 4,
         ('baroque', 'emotional'): 4,
         ('emotional', 'famous'): 4,
         ('great', 'same'): 4,
         ('-', 'dead'): 4,
         ('first', 'original'): 4,
         ('famous', 'first'): 4,
         ('baroque', 'famous'): 4,
         ('good', 'personal'): 4,
         ('criminal', 'good'): 4,
         ('good', 'nice'): 4,
         ('Nazi', 'good'): 4,
         ('brutal', 'good'): 4,
         ('awful', 'stupid'): 4,
         ('criminal', 'true'): 4,
         ('big', 'true'): 4,
         ('Nazi', 'true'): 4,
         ('criminal', 'least'): 4,
         ('-', 'criminal'): 4,
         ('Nazi', 'criminal'): 4,
         ('least', 'vulgar'): 4,
         ('Nazi', 'least'): 4,
         ('big', 'special'): 4,
         ('horrible', 'most'): 4,
         ('-', 'Nazi'): 4,
         ('more', 'poor'): 4,
         ('modern', 'unfunny'): 4,
         ('same', 'unfunny'): 4,
         ('horrible', 'same'): 4,
         ('great', 'whole'): 4,
         ('many', 'plain'): 4,
         ('absurd', 'more'): 4,
         ('common', 'many'): 4,
         ('hotter', 'many'): 4,
         ('absurdist', 'many'): 4,
         ('fantastical', 'many'): 4,
         ('many', 'skeptical'): 4,
         ('critical', 'many'): 4,
         ('bitter', 'many'): 4,
         ('many', 'unaired'): 4,
         ('correct', 'many'): 4,
         ('many', 'psychotic'): 4,
         ('better', 'negative'): 4,
         ('good', 'negative'): 4,
         ('Other', 'same'): 4,
         ('repetitive', 'same'): 4,
         ('first', 'long'): 4,
         ('popular', 'special'): 4,
         ('ridiculous', 'special'): 4,
         ('bad', 'least'): 4,
         ('bad', 'yellow'): 4,
         ('bad', 'poor'): 4,
         ('funny', 'new'): 4,
         ('funny', 'worst'): 4,
         ('same', 'yellow'): 4,
         ('least', 'red'): 4,
         ('least', 'yellow'): 4,
         ('last', 'least'): 4,
         ('slow', 'yellow'): 4,
         ('poor', 'young'): 4,
         ('dead', 'poor'): 4,
         ('first', 'full'): 4,
         ('possible', 'terrible'): 4,
         ('sexy', 'tragic'): 4,
         ('high', 'sexy'): 4,
         ('silly', 'small'): 4,
         ('Sunny', 'small'): 4,
         ('small', 'wounded'): 4,
         ('medical', 'small'): 4,
         ('much', 'sexy'): 4,
         ('much', 'sure'): 4,
         ('sexy', 'tough'): 4,
         ('Sunny', 'silly'): 4,
         ('silly', 'wounded'): 4,
         ('medical', 'silly'): 4,
         ('modern', 'sexy'): 4,
         ('femme', 'sexy'): 4,
         ('fatal', 'sexy'): 4,
         ('bad', 'sexy'): 4,
         ('sexy', 'sure'): 4,
         ('good', 'sexy'): 4,
         ('fresh', 'sexy'): 4,
         ('little', 'sexy'): 4,
         ('poor', 'sexy'): 4,
         ('classic', 'sexy'): 4,
         ('Double', 'sexy'): 4,
         ('bright', 'sexy'): 4,
         ('devious', 'sexy'): 4,
         ('particular', 'sexy'): 4,
         ('compelling', 'sexy'): 4,
         ('same', 'sexy'): 4,
         ('rural', 'sexy'): 4,
         ('implausible', 'sexy'): 4,
         ('ridiculous', 'sexy'): 4,
         ('sad', 'sexy'): 4,
         ('sexy', 'stereotypical'): 4,
         ('Hispanic', 'sexy'): 4,
         ('sexy', 'unintentional'): 4,
         ('red', 'sexy'): 4,
         ('Sunny', 'wounded'): 4,
         ('Sunny', 'medical'): 4,
         ('medical', 'wounded'): 4,
         ('good', 'sure'): 4,
         ('more', 'particular'): 4,
         ('beautiful', 'much'): 4,
         ('much', 'nice'): 4,
         ('Mediterrenean', 'beautiful'): 4,
         ('beautiful', 'lovely'): 4,
         ('beautiful', 'romantic'): 4,
         ('beautiful', 'convincing'): 4,
         ('beautiful', 'egomaniac'): 4,
         ('beautiful', 'bent'): 4,
         ('beautiful', 'vulnerable'): 4,
         ('beautiful', 'familiar'): 4,
         ('beautiful', 'macho'): 4,
         ('beautiful', 'obnoxious'): 4,
         ('beautiful', 'difficult'): 4,
         ('beautiful', 'central'): 4,
         ('beautiful', 'mediocre'): 4,
         ('azure', 'female'): 4,
         ('convincing', 'many'): 4,
         ('little', 'other'): 4,
         ('little', 'new'): 4,
         ('little', 'most'): 4,
         ('catholic', 'other'): 4,
         ('catholic', 'young'): 4,
         ('catholic', 'good'): 4,
         ('catholic', 'tough'): 4,
         ('other', 'young'): 4,
         ('other', 'tough'): 4,
         ('most', 'other'): 4,
         ('better', 'new'): 4,
         ('hard', 'young'): 4,
         ('hard', 'impossible'): 4,
         ('more', 'young'): 4,
         ('tough', 'young'): 4,
         ('same', 'young'): 4,
         ('broad', 'good'): 4,
         ('-', 'terrible'): 4,
         ('funny', 'smart'): 4,
         ('bad', 'typical'): 4,
         ('bad', 'full'): 4,
         ('bad', 'close'): 4,
         ('bad', 'technical'): 4,
         ('close', 'good'): 4,
         ('little', 'typical'): 4,
         ('many', 'typical'): 4,
         ('many', 'sure'): 4,
         ('last', 'typical'): 4,
         ('clear', 'first'): 4,
         ('much', 'successful'): 4,
         ('vampire', 'whole'): 4,
         ('few', 'other'): 4,
         ('angry', 'young'): 4,
         ('unrelenting', 'young'): 4,
         ('closest', 'young'): 4,
         ('rundown', 'young'): 4,
         ('usual', 'young'): 4,
         ('personal', 'young'): 4,
         ('entrenched', 'young'): 4,
         ('older', 'young'): 4,
         ('experienced', 'young'): 4,
         ('dramatic', 'young'): 4,
         ('overall', 'young'): 4,
         ('complex', 'young'): 4,
         ('only', 'worst'): 4,
         ('few', 'funny'): 4,
         ('boring', 'clear'): 4,
         ('boring', 'friendly'): 4,
         ('clear', 'friendly'): 4,
         ('big', 'few'): 4,
         ('corrupt', 'good'): 4,
         ('double', 'many'): 4,
         ('corrupt', 'double'): 4,
         ('corrupt', 'many'): 4,
         ('Other', 'honorable'): 4,
         ('beggar', 'honorable'): 4,
         ('honorable', 'hungry'): 4,
         ('Other', 'beggar'): 4,
         ('Other', 'hungry'): 4,
         ('beggar', 'hungry'): 4,
         ('next', 'same'): 4,
         ('bad', 'weird'): 4,
         ('only', 'terrible'): 4,
         ('professional', 'worst'): 4,
         ('good', 'professional'): 4,
         ('least', 'professional'): 4,
         ('hard', 'professional'): 4,
         ('constant', 'good'): 4,
         ('hard', 'least'): 4,
         ('good', 'pure'): 4,
         ('basic', 'big'): 4,
         ('basic', 'little'): 4,
         ('basic', 'much'): 4,
         ('basic', 'least'): 4,
         ('convincing', 'interesting'): 4,
         ('convincing', 'first'): 4,
         ('convincing', 'particular'): 4,
         ('convincing', 'sure'): 4,
         ('hard', 'main'): 4,
         ('Spanish', 'pretty'): 4,
         ('main', 'middle'): 4,
         ('main', 'popular'): 4,
         ('certain', 'main'): 4,
         ('main', 'ready'): 4,
         ('main', 'mixed'): 4,
         ('gratuitous', 'main'): 4,
         ('main', 'vital'): 4,
         ('impetuous', 'main'): 4,
         ('boorish', 'main'): 4,
         ('main', 'military'): 4,
         ...})
In [7]:
# 上位30件の共起ペアを抽出
top_positive_pairs = cooccurrence_positive.most_common(30)
top_negative_pairs = cooccurrence_negative.most_common(30)
In [ ]:
import csv
from pyvis.network import Network
from collections import defaultdict

def visualize_cooccurrence_with_node_size(top_pairs, tokens_list, title):
    """
    共起ペアをネットワークとして可視化し、ノードとエッジをCSVに出力する関数。
    ノードのサイズは単語の出現回数に基づいて設定。
    """
    net = Network(height="750px", width="100%", notebook=True)
    net.force_atlas_2based()  # レイアウト設定

    # 単語の出現回数を計算
    word_counts = defaultdict(int)
    for tokens in tokens_list:
        for token in tokens:
            word_counts[token] += 1

    nodes = set()  # ノードを管理するセット
    edges = []     # エッジを管理するリスト

    # ノードとエッジを追加
    for (word1, word2), weight in top_pairs:
        # ノードを追加(重複を避ける)
        if word1 not in nodes:
            net.add_node(word1, label=word1, size=word_counts[word1]) 
            nodes.add(word1)
        if word2 not in nodes:
            net.add_node(word2, label=word2, size=word_counts[word2])
            nodes.add(word2)
        # エッジを追加
        edges.append((word1, word2, weight))
        net.add_edge(word1, word2, value=weight)

    # ノードをCSVに出力
    nodes_csv = f"nodes_{title}.csv"
    with open(nodes_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['word', 'size'])  # ヘッダー
        for node in nodes:
            writer.writerow([node, word_counts[node]])  # ノードを保存
    print(f"Nodes saved as {nodes_csv}")

    # エッジをCSVに出力
    edges_csv = f"edges_{title}.csv"
    with open(edges_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['word1', 'word2', 'weight'])  # ヘッダー
        writer.writerows(edges)  # エッジを保存
    print(f"Edges saved as {edges_csv}")

    # HTMLファイルとして保存して表示
    output_file = f"{title}.html"
    net.show(output_file)
    print(f"Visualization saved as {output_file}")
In [25]:
# ポジティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
    top_positive_pairs, 
    review_positive['tokens'], 
    "positive_cooccurrence_adjectives"
)
Warning: When  cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook.
Nodes saved as nodes_positive_cooccurrence_adjectives.csv
Edges saved as edges_positive_cooccurrence_adjectives.csv
positive_cooccurrence_adjectives.html
Visualization saved as positive_cooccurrence_adjectives.html
In [27]:
# ネガティブレビューの共起ネットワークを可視化
visualize_cooccurrence_with_node_size(
    top_negative_pairs, 
    review_negative['tokens'], 
    "negative_cooccurrence_adjectives"
)
Warning: When  cdn_resources is 'local' jupyter notebook has issues displaying graphics on chrome/safari. Use cdn_resources='in_line' or cdn_resources='remote' if you have issues viewing graphics in a notebook.
Nodes saved as nodes_negative_cooccurrence_adjectives.csv
Edges saved as edges_negative_cooccurrence_adjectives.csv
negative_cooccurrence_adjectives.html
Visualization saved as negative_cooccurrence_adjectives.html
In [ ]: