@@ -550,7 +550,7 @@ class CLIPEmbeddings : public GGMLBlock {
550550 int64_t num_positions;
551551 bool force_clip_f32;
552552
553- void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) {
553+ void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) override {
554554 enum ggml_type token_wtype = GGML_TYPE_F32;
555555 if (!force_clip_f32) {
556556 token_wtype = get_type (prefix + " token_embedding.weight" , tensor_types, GGML_TYPE_F32);
@@ -587,7 +587,7 @@ class CLIPEmbeddings : public GGMLBlock {
587587
588588 GGML_ASSERT (input_ids->ne [0 ] == position_embed_weight->ne [1 ]);
589589 input_ids = ggml_reshape_3d (ctx, input_ids, input_ids->ne [0 ], 1 , input_ids->ne [1 ]);
590- auto token_embedding = ggml_get_rows (ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids);
590+ auto token_embedding = ggml_get_rows (ctx, custom_embed_weight != nullptr ? custom_embed_weight : token_embed_weight, input_ids);
591591 token_embedding = ggml_reshape_3d (ctx, token_embedding, token_embedding->ne [0 ], token_embedding->ne [1 ], token_embedding->ne [3 ]);
592592
593593 // token_embedding + position_embedding
@@ -606,7 +606,7 @@ class CLIPVisionEmbeddings : public GGMLBlock {
606606 int64_t image_size;
607607 int64_t num_patches;
608608 int64_t num_positions;
609- void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) {
609+ void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) override {
610610 enum ggml_type patch_wtype = GGML_TYPE_F16;
611611 enum ggml_type class_wtype = GGML_TYPE_F32;
612612 enum ggml_type position_wtype = GGML_TYPE_F32;
@@ -641,10 +641,10 @@ class CLIPVisionEmbeddings : public GGMLBlock {
641641 // concat(patch_embedding, class_embedding) + position_embedding
642642 struct ggml_tensor * patch_embedding;
643643 int64_t N = pixel_values->ne [3 ];
644- patch_embedding = ggml_nn_conv_2d (ctx, pixel_values, patch_embed_weight, NULL , patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
645- patch_embedding = ggml_reshape_3d (ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
646- patch_embedding = ggml_cont (ctx, ggml_permute (ctx, patch_embedding, 1 , 0 , 2 , 3 )); // [N, num_patches, embed_dim]
647- patch_embedding = ggml_reshape_4d (ctx, patch_embedding, 1 , embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
644+ patch_embedding = ggml_nn_conv_2d (ctx, pixel_values, patch_embed_weight, nullptr , patch_size, patch_size); // [N, embed_dim, image_size // pacht_size, image_size // pacht_size]
645+ patch_embedding = ggml_reshape_3d (ctx, patch_embedding, num_patches, embed_dim, N); // [N, embed_dim, num_patches]
646+ patch_embedding = ggml_cont (ctx, ggml_permute (ctx, patch_embedding, 1 , 0 , 2 , 3 )); // [N, num_patches, embed_dim]
647+ patch_embedding = ggml_reshape_4d (ctx, patch_embedding, 1 , embed_dim, num_patches, N); // [N, num_patches, embed_dim, 1]
648648
649649 struct ggml_tensor * class_embedding = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, embed_dim, N);
650650 class_embedding = ggml_repeat (ctx, class_embed_weight, class_embedding); // [N, embed_dim]
@@ -669,7 +669,7 @@ enum CLIPVersion {
669669
670670class CLIPTextModel : public GGMLBlock {
671671protected:
672- void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) {
672+ void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) override {
673673 if (version == OPEN_CLIP_VIT_BIGG_14) {
674674 enum ggml_type wtype = GGML_TYPE_F32;
675675 params[" text_projection" ] = ggml_new_tensor_2d (ctx, wtype, projection_dim, hidden_size);
@@ -735,8 +735,8 @@ class CLIPTextModel : public GGMLBlock {
735735 if (return_pooled) {
736736 auto text_projection = params[" text_projection" ];
737737 ggml_tensor* pooled = ggml_view_1d (ctx, x, hidden_size, x->nb [1 ] * max_token_idx);
738- if (text_projection != NULL ) {
739- pooled = ggml_nn_linear (ctx, pooled, text_projection, NULL );
738+ if (text_projection != nullptr ) {
739+ pooled = ggml_nn_linear (ctx, pooled, text_projection, nullptr );
740740 } else {
741741 LOG_DEBUG (" identity projection" );
742742 }
@@ -814,7 +814,7 @@ class CLIPProjection : public UnaryBlock {
814814 int64_t out_features;
815815 bool transpose_weight;
816816
817- void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) {
817+ void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) override {
818818 enum ggml_type wtype = get_type (prefix + " weight" , tensor_types, GGML_TYPE_F32);
819819 if (transpose_weight) {
820820 params[" weight" ] = ggml_new_tensor_2d (ctx, wtype, out_features, in_features);
@@ -831,12 +831,12 @@ class CLIPProjection : public UnaryBlock {
831831 out_features (out_features),
832832 transpose_weight(transpose_weight) {}
833833
834- struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
834+ struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) override {
835835 struct ggml_tensor * w = params[" weight" ];
836836 if (transpose_weight) {
837837 w = ggml_cont (ctx, ggml_transpose (ctx, w));
838838 }
839- return ggml_nn_linear (ctx, x, w, NULL );
839+ return ggml_nn_linear (ctx, x, w, nullptr );
840840 }
841841};
842842
@@ -894,7 +894,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
894894 model.init (params_ctx, tensor_types, prefix);
895895 }
896896
897- std::string get_desc () {
897+ std::string get_desc () override {
898898 return " clip" ;
899899 }
900900
@@ -921,17 +921,17 @@ struct CLIPTextModelRunner : public GGMLRunner {
921921
922922 struct ggml_cgraph * build_graph (struct ggml_tensor * input_ids,
923923 int num_custom_embeddings = 0 ,
924- void * custom_embeddings_data = NULL ,
924+ void * custom_embeddings_data = nullptr ,
925925 size_t max_token_idx = 0 ,
926926 bool return_pooled = false ,
927927 int clip_skip = -1 ) {
928928 struct ggml_cgraph * gf = ggml_new_graph (compute_ctx);
929929
930930 input_ids = to_backend (input_ids);
931931
932- struct ggml_tensor * embeddings = NULL ;
932+ struct ggml_tensor * embeddings = nullptr ;
933933
934- if (num_custom_embeddings > 0 && custom_embeddings_data != NULL ) {
934+ if (num_custom_embeddings > 0 && custom_embeddings_data != nullptr ) {
935935 auto token_embed_weight = model.get_token_embed_weight ();
936936 auto custom_embeddings = ggml_new_tensor_2d (compute_ctx,
937937 token_embed_weight->type ,
@@ -958,7 +958,7 @@ struct CLIPTextModelRunner : public GGMLRunner {
958958 bool return_pooled,
959959 int clip_skip,
960960 ggml_tensor** output,
961- ggml_context* output_ctx = NULL ) {
961+ ggml_context* output_ctx = nullptr ) {
962962 auto get_graph = [&]() -> struct ggml_cgraph * {
963963 return build_graph(input_ids, num_custom_embeddings, custom_embeddings_data, max_token_idx, return_pooled, clip_skip);
964964 };
0 commit comments