From 6e838ddfd53a6e162c177be8d67a9aeef61fbf54 Mon Sep 17 00:00:00 2001 From: jung-geun Date: Fri, 20 Oct 2023 05:47:25 +0900 Subject: [PATCH] =?UTF-8?q?23-10-20=20=EC=A1=B0=EA=B8=B0=20=EC=88=98?= =?UTF-8?q?=EB=A0=B4=20=EC=8B=9C=20=ED=8C=8C=ED=8B=B0=ED=81=B4=20=EB=A6=AC?= =?UTF-8?q?=EC=85=8B=20=EC=A0=81=EC=9A=A9=20=EB=AA=A8=EB=8D=B8=EC=9D=98=20?= =?UTF-8?q?=EC=B4=88=EA=B8=B0=ED=99=94=20=EC=88=98=EC=A0=95=20=3D>=20?= =?UTF-8?q?=EB=9E=9C=EB=8D=A4=EA=B0=92=EC=9D=80=20=EB=AC=B8=EC=A0=9C?= =?UTF-8?q?=EA=B0=80=20=EB=A7=8E=EC=9D=8C=20=EB=AF=B8=EB=8B=88=EB=B0=B0?= =?UTF-8?q?=EC=B9=98=20=EC=B4=88=EA=B8=B0=ED=99=94=20=EC=8B=9C=20=EC=9E=90?= =?UTF-8?q?=EB=8F=99=20shuffle=20=EC=A0=81=EC=9A=A9=20negative=20=ED=8C=8C?= =?UTF-8?q?=ED=8B=B0=ED=81=B4=20=ED=8A=B9=EC=A0=95=20=EC=88=98=EC=B9=98?= =?UTF-8?q?=EB=A7=88=EB=8B=A4=20=EC=B4=88=EA=B8=B0=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fashion_mnist.py | 13 +++--- fashion_mnist_tf.py | 10 ++--- mnist.py | 34 ++++++---------- pso/__init__.py | 2 +- pso/optimizer.py | 84 +++++++++++++++++++++++++++------------ pso/particle.py | 96 ++++++++++++++++++++++++++++++++++++++------- setup.py | 1 - 7 files changed, 167 insertions(+), 73 deletions(-) diff --git a/fashion_mnist.py b/fashion_mnist.py index 6b37d5a..1da433b 100644 --- a/fashion_mnist.py +++ b/fashion_mnist.py @@ -39,11 +39,12 @@ def make_model(): Conv2D(32, kernel_size=(5, 5), activation="sigmoid", input_shape=(28, 28, 1)) ) - model.add(MaxPooling2D(pool_size=(3, 3))) + model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, kernel_size=(3, 3), activation="sigmoid")) model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.25)) model.add(Flatten()) + model.add(Dropout(0.25)) + model.add(Dense(256, activation="sigmoid")) model.add(Dense(128, activation="sigmoid")) model.add(Dense(10, activation="softmax")) @@ -97,8 +98,10 @@ pso_mnist = optimizer( w_max=0.5, negative_swarm=0.05, mutation_swarm=0.3, - particle_min=-4, - particle_max=4, + particle_min=-0.3, + particle_max=0.3, + early_stopping=True, + early_stopping_patience=10, ) best_score = pso_mnist.fit( @@ -113,7 +116,7 @@ best_score = pso_mnist.fit( check_point=25, empirical_balance=False, dispersion=False, - batch_size=32, + batch_size=1024, ) print("Done!") diff --git a/fashion_mnist_tf.py b/fashion_mnist_tf.py index 29337a9..988d79e 100644 --- a/fashion_mnist_tf.py +++ b/fashion_mnist_tf.py @@ -74,15 +74,15 @@ class _batch_generator: def make_model(): model = Sequential() model.add( - Conv2D(32, kernel_size=(5, 5), activation="relu", + Conv2D(32, kernel_size=(5, 5), activation="sigmoid", input_shape=(28, 28, 1)) ) - model.add(MaxPooling2D(pool_size=(3, 3))) - model.add(Conv2D(64, kernel_size=(3, 3), activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.25)) + model.add(Conv2D(64, kernel_size=(3, 3), activation="sigmoid")) + model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) - model.add(Dense(256, activation="relu")) + model.add(Dropout(0.25)) + model.add(Dense(128, activation="sigmoid")) model.add(Dense(10, activation="softmax")) return model diff --git a/mnist.py b/mnist.py index 60317a8..cd5961e 100644 --- a/mnist.py +++ b/mnist.py @@ -33,34 +33,18 @@ def get_data(): return x_train, y_train, x_test, y_test -def get_data_test(): - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_test = x_test / 255.0 - x_test = x_test.reshape((10000, 28, 28, 1)) - - y_train, y_test = tf.one_hot(y_train, 10), tf.one_hot(y_test, 10) - - x_train, x_test = tf.convert_to_tensor( - x_train), tf.convert_to_tensor(x_test) - y_train, y_test = tf.convert_to_tensor( - y_train), tf.convert_to_tensor(y_test) - - print(f"x_test : {x_test[0].shape} | y_test : {y_test[0].shape}") - - return x_test, y_test - - def make_model(): model = Sequential() model.add( Conv2D(32, kernel_size=(5, 5), activation="sigmoid", input_shape=(28, 28, 1)) ) - model.add(MaxPooling2D(pool_size=(3, 3))) + model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, kernel_size=(3, 3), activation="sigmoid")) model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.25)) model.add(Flatten()) + model.add(Dropout(0.25)) + model.add(Dense(256, activation="sigmoid")) model.add(Dense(128, activation="sigmoid")) model.add(Dense(10, activation="softmax")) @@ -107,15 +91,19 @@ loss = [ pso_mnist = optimizer( model, loss="mean_squared_error", - n_particles=600, + n_particles=900, c0=0.2, c1=0.4, w_min=0.3, w_max=0.5, negative_swarm=0.05, mutation_swarm=0.3, - particle_min=-4, - particle_max=4, + particle_min=-0.3, + particle_max=0.3, + early_stopping=True, + early_stopping_patience=10, + early_stopping_monitor="loss", + early_stopping_min_delta=0.0005, ) best_score = pso_mnist.fit( @@ -130,7 +118,7 @@ best_score = pso_mnist.fit( check_point=25, empirical_balance=False, dispersion=False, - batch_size=32, + batch_size=1024, ) print("Done!") diff --git a/pso/__init__.py b/pso/__init__.py index 1c8935f..a45d4e4 100644 --- a/pso/__init__.py +++ b/pso/__init__.py @@ -1,7 +1,7 @@ from .optimizer import Optimizer as optimizer from .particle import Particle as particle -__version__ = "0.1.9" +__version__ = "1.0.0" __all__ = [ "optimizer", diff --git a/pso/optimizer.py b/pso/optimizer.py index 0a5304a..4fde5f6 100644 --- a/pso/optimizer.py +++ b/pso/optimizer.py @@ -39,8 +39,12 @@ class Optimizer: np_seed: int = None, tf_seed: int = None, random_state: tuple = None, - particle_min: float = -5, - particle_max: float = 5, + particle_min: float = -0.3, + particle_max: float = 0.3, + convergence_reset: bool = False, + convergence_reset_patience: int = 10, + convergence_reset_min_delta: float = 0.0001, + convergence_reset_monitor: str = "loss", ): """ particle swarm optimization @@ -59,6 +63,10 @@ class Optimizer: tf_seed (int, optional): tensorflow seed. Defaults to None. particle_min (float, optional): 가중치 초기화 최소값. Defaults to -5. particle_max (float, optional): 가중치 초기화 최대값. Defaults to 5. + convergence_reset (bool, optional): early stopping 사용 여부. Defaults to False. + convergence_reset_patience (int, optional): early stopping 사용시 얼마나 기다릴지. Defaults to 10. + convergence_reset_min_delta (float, optional): early stopping 사용시 얼마나 기다릴지. Defaults to 0.0001. + convergence_reset_monitor (str, optional): early stopping 사용시 어떤 값을 기준으로 할지. Defaults to "loss". """ if np_seed is not None: np.random.seed(np_seed) @@ -95,36 +103,36 @@ class Optimizer: self.day = datetime.now().strftime("%Y%m%d-%H%M%S") self.empirical_balance = False + negative_count = 0 self.train_summary_writer = [None] * self.n_particles + try: print(f"start running time : {self.day}") for i in tqdm(range(self.n_particles), desc="Initializing Particles"): - model_ = keras.models.model_from_json(model.to_json()) - w_, sh_, len_ = self._encode(model_.get_weights()) - w_ = np.random.uniform(particle_min, particle_max, len(w_)) - model_.set_weights(self._decode(w_, sh_, len_)) - model_.compile( - loss=self.loss, - optimizer="sgd", - metrics=["accuracy"] - ) self.particles[i] = Particle( - model_, - loss, - negative=True if i < negative_swarm * self.n_particles else False, - mutation=mutation_swarm, + model, + self.loss, + negative=True if i < self.negative_swarm * self.n_particles else False, + mutation=self.mutation_swarm, + converge_reset=convergence_reset, + converge_reset_patience=convergence_reset_patience, + converge_reset_monitor=convergence_reset_monitor, + converge_reset_min_delta=convergence_reset_min_delta, ) - if i < negative_swarm * self.n_particles: + + if i < self.negative_swarm * self.n_particles: negative_count += 1 # del m, init_weights, w_, sh_, len_ gc.collect() tf.keras.backend.reset_uids() tf.keras.backend.clear_session() - print(f"negative swarm : {negative_count} / {self.n_particles}") + # del model_ + + print(f"negative swarm : {negative_count} / {n_particles}") print(f"mutation swarm : {mutation_swarm * 100}%") gc.collect() @@ -240,6 +248,7 @@ class Optimizer: self.index += 1 if self.index >= self.max_index: self.index = 0 + self.__getBatchSlice__(self.batch_size) return self.dataset[self.index][0], self.dataset[self.index][1] def getMaxIndex(self): @@ -259,12 +268,15 @@ class Optimizer: if self.batch_size > len(self.x): self.batch_size = len(self.x) print(f"batch size : {self.batch_size}") - self.dataset = list( - tf.data.Dataset.from_tensor_slices( - (self.x, self.y)).batch(batch_size) - ) + self.dataset = self.__getBatchSlice__(self.batch_size) self.max_index = len(self.dataset) + def __getBatchSlice__(self, batch_size): + return list( + tf.data.Dataset.from_tensor_slices( + (self.x, self.y)).shuffle(len(self.x)).batch(batch_size) + ) + def getDataset(self): return self.dataset @@ -281,7 +293,8 @@ class Optimizer: empirical_balance: bool = False, dispersion: bool = False, check_point: int = None, - batch_size: int = 128, + batch_size: int = None, + validate_data: any = None, ): """ # Args: @@ -295,12 +308,35 @@ class Optimizer: empirical_balance : bool - True : EBPSO, False : PSO, dispersion : bool - True : g_best 의 값을 분산시켜 전역해를 찾음, False : g_best 의 값만 사용 check_point : int - 저장할 위치 - None : 저장 안함 - batch_size : int - batch size default : 128 + batch_size : int - batch size default : None => len(x) // 10 + batch_size > len(x) : auto max batch size """ + try: + if x.shape[0] != y.shape[0]: + raise ValueError("x, y shape error") + + if log not in [0, 1, 2]: + raise ValueError("log not in [0, 1, 2]") + + if save_info and save_path is None: + raise ValueError("save_path is None") + + if renewal not in ["acc", "loss", "both"]: + raise ValueError("renewal not in ['acc', 'loss', 'both']") + + if check_point is not None and save_path is None: + raise ValueError("save_path is None") + + except ValueError as ve: + sys.exit(ve) + self.save_path = save_path self.empirical_balance = empirical_balance self.dispersion = dispersion + if batch_size is None: + batch_size = len(x) // 10 + self.renewal = renewal particle_sum = 0 # x_j try: @@ -326,7 +362,7 @@ class Optimizer: model_ = keras.models.model_from_json(self.model.to_json()) model_.compile(loss=self.loss, optimizer="adam", metrics=["accuracy"]) - model_.fit(x, y, epochs=1, batch_size=64, verbose=0) + model_.fit(x, y, epochs=1, verbose=0) score = model_.evaluate(x, y, verbose=1) if renewal == "acc": diff --git a/pso/particle.py b/pso/particle.py index 23f53e0..305a8fa 100644 --- a/pso/particle.py +++ b/pso/particle.py @@ -15,29 +15,51 @@ class Particle: """ def __init__( - self, model: keras.models, loss, negative: bool = False, mutation: float = 0 + self, + model: keras.models, + loss, + negative: bool = False, + mutation: float = 0, + converge_reset: bool = False, + converge_reset_patience: int = 10, + converge_reset_monitor: str = "loss", + converge_reset_min_delta: float = 0.0001, ): """ Args: model (keras.models): 학습 및 검증을 위한 모델 loss (str|): 손실 함수 negative (bool, optional): 음의 가중치 사용 여부 - 전역 탐색 용도(조기 수렴 방지). Defaults to False. + mutation (float, optional): 돌연변이 확률. Defaults to 0. + converge_reset (bool, optional): 조기 종료 사용 여부. Defaults to False. + converge_reset_patience (int, optional): 조기 종료를 위한 기다리는 횟수. Defaults to 10. """ self.model = model self.loss = loss - init_weights = self.model.get_weights() - i_w_, i_s, i_l = self._encode(init_weights) - i_w_ = np.random.uniform(-0.5, 0.5, len(i_w_)) - self.velocities = self._decode(i_w_, i_s, i_l) + + try: + if converge_reset and converge_reset_monitor not in ["acc", "accuracy", "loss"]: + raise ValueError( + "converge_reset_monitor must be 'acc' or 'accuracy' or 'loss'" + ) + if converge_reset and converge_reset_min_delta < 0: + raise ValueError("converge_reset_min_delta must be positive") + if converge_reset and converge_reset_patience < 0: + raise ValueError("converge_reset_patience must be positive") + except ValueError as e: + print(e) + exit(1) + + self.reset_particle() self.negative = negative self.mutation = mutation self.best_score = 0 - self.best_weights = init_weights - self.before_best = init_weights self.before_w = 0 - - del i_w_, i_s, i_l - del init_weights + self.score_history = [] + self.converge_reset = converge_reset + self.converge_reset_patience = converge_reset_patience + self.converge_reset_monitor = converge_reset_monitor + self.converge_reset_min_delta = converge_reset_min_delta def __del__(self): del self.model @@ -89,6 +111,7 @@ class Particle: w_ = np.reshape(w_, shape[i]) weights.append(w_) start = end + del start, end, w_ del shape, length del weight @@ -119,6 +142,42 @@ class Particle: return score + def __check_converge_reset__(self, score, monitor="loss", patience: int = 10, min_delta: float = 0.0001): + """ + early stop을 구현한 함수 + + Args: + score (float): 현재 점수 [0] - loss, [1] - acc + monitor (str, optional): 감시할 점수. Defaults to "loss". + patience (int, optional): early stop을 위한 기다리는 횟수. Defaults to 10. + min_delta (float, optional): early stop을 위한 최소 변화량. Defaults to 0.0001. + """ + if monitor in ["acc", "accuracy"]: + self.score_history.append(score[1]) + elif monitor in ["loss"]: + self.score_history.append(score[0]) + + if len(self.score_history) > patience: + last_scores = self.score_history[-patience:] + if max(last_scores) - min(last_scores) < min_delta: + return True + return False + + def reset_particle(self): + self.model = keras.models.model_from_json(self.model.to_json()) + self.model.compile(optimizer="adam", loss=self.loss, + metrics=["accuracy"]) + init_weights = self.model.get_weights() + i_w_, i_s, i_l = self._encode(init_weights) + i_w_ = np.random.uniform(-0.05, 0.05, len(i_w_)) + self.velocities = self._decode(i_w_, i_s, i_l) + + self.best_weights = init_weights + self.before_best = init_weights + + del init_weights, i_w_, i_s, i_l + self.score_history = [] + def _update_velocity(self, local_rate, global_rate, w, g_best): """ 현재 속도 업데이트 @@ -140,7 +199,7 @@ class Particle: r_1 = np.random.rand() if not np.array_equal(encode_before, encode_g, equal_nan=True): - self.before_w = w * 0.6 + self.before_w = w * 0.5 w = w + self.before_w else: self.before_w *= 0.75 @@ -152,6 +211,9 @@ class Particle: + local_rate * r_0 * (encode_p - encode_w) + -1 * global_rate * r_1 * (encode_g - encode_w) ) + if len(self.score_history) > 10 and max(self.score_history[-10:]) - min(self.score_history[-10:]) < 0.01: + self.reset_particle() + else: new_v = ( w * encode_v @@ -160,7 +222,7 @@ class Particle: ) if np.random.rand() < self.mutation: - m_v = np.random.uniform(-0.2, 0.2, len(encode_v)) + m_v = np.random.uniform(-0.1, 0.1, len(encode_v)) new_v = m_v self.velocities = self._decode(new_v, w_sh, w_len) @@ -196,7 +258,7 @@ class Particle: r_1 = np.random.rand() if not np.array_equal(encode_before, encode_g, equal_nan=True): - self.before_w = w * 0.6 + self.before_w = w * 0.5 w = w + self.before_w else: self.before_w *= 0.75 @@ -258,7 +320,13 @@ class Particle: self._update_velocity(local_rate, global_rate, w, g_best) self._update_weights() - return self.get_score(x, y, renewal) + score = self.get_score(x, y, renewal) + + if self.converge_reset and self.__check_converge_reset__( + score, self.converge_reset_monitor, self.converge_reset_patience, self.converge_reset_min_delta): + self.reset_particle() + + return score def step_w( self, x, y, local_rate, global_rate, w, g_best, w_p, w_g, renewal: str = "acc" diff --git a/setup.py b/setup.py index acd0937..c7dd8b7 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,6 @@ setup( classifiers=[ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10",