부족하지만 제가한거 올립니다
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
raw['review'] = raw['review'].str.replace('[^ㄱ-ㅎㅏ-ㅣ가-힣0-9 ]', '', regex=True)
raw.drop_duplicates(subset=['review'], inplace=True)
unique_review = raw['review'].tolist()
unique_review = ''.join(unique_review)
tokenizer = Tokenizer(char_level=True, oov_token='<OOV>')
unique_review = raw['review'].tolist()
tokenizer.fit_on_texts(unique_review)
train_seq = tokenizer.texts_to_sequences(unique_review)
Y = raw['label'].tolist()
raw['length'] = raw['review'].str.len()
X = pad_sequences(train_seq, maxlen=100, padding='pre')
trainX, valX, trainY, valY = train_test_split(X, Y, test_size=0.2, random_state=42)
trainY = np.array(trainY, dtype=np.float32)
valY = np.array(valY, dtype=np.float32)
model = tf.keras.models.Sequential([
tf.keras.layers.Input(shape=(100,)),
tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, 16),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128)),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
model.fit(
trainX, trainY,
epochs=5,
batch_size=64,
validation_data=(valX, valY)
)
model.summary()