제출 #1303012

#제출 시각아이디문제언어결과실행 시간메모리
1303012liangjeremyCubeword (CEOI19_cubeword)C++20
컴파일 에러
0 ms0 KiB
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")

#include<bits/stdc++.h>
using namespace std;

// Keep the macro for your logic, but we will use int32_t for performance critical parts
#define int long long 

// Standard definitions
using ll=int64_t;

// Optimized Modular Integer
// Removed internal long long storage to save cache bandwidth
template<int mod>
struct mint {
    int32_t v; // Use 32-bit integer for storage
    mint():v(0){}
    mint(ll _v):v((int32_t)(_v%mod)){ if(v<0) v+=mod; }
    
    mint& operator+=(const mint& o){
        if((v+=o.v)>=mod)v-=mod;
        return *this;
    }
    mint& operator-=(const mint& o){
        if((v-=o.v)<0)v+=mod;
        return *this; 
    }
    mint& operator*=(const mint& o){
        v = (int32_t)((ll)v*o.v%mod); // Cast to long long only for multiplication
        return *this; 
    }
    
    friend mint operator+(mint a, const mint& b){ return a+=b; }
    friend mint operator*(mint a, const mint& b){ return a*=b; }
}; 

using mi = mint<998244353>;

// GLOBAL STATIC ARRAYS
// Replacing vectors with fixed size arrays for contiguous memory
// 205 is sufficient based on your code (vis is size 200)
const int MAXT = 205;
mi dp1[11][MAXT][MAXT];
mi dp2[MAXT][MAXT][MAXT];

int32_t main(){
    ios::sync_with_stdio(0); cin.tie(0); cout.tie(0);
    
    int n; 
    if(!(cin >> n)) return 0;
    
    vector<string> s(2*n); 
    vector<int> vis(256, 0); // Increased slightly for safety
    
    for(int i=0; i<n; i++){
        cin >> s[i]; 
        s[i+n] = s[i]; 
        reverse(s[i+n].begin(), s[i+n].end()); 
        for(auto x:s[i]){ vis[x]=1; }
    }
    
    sort(s.begin(), s.end()); 
    s.erase(unique(s.begin(), s.end()), s.end()); 
    n = s.size(); 
    
    int timer = 0; 
    vector<int> pos(256); 
    for(int i=0; i<256; i++){
        if(vis[i]){
            timer++; pos[i]=timer; 
        }
    }
    
    // Fill dp1
    for(int i=0; i<n; i++){
        if(s[i].size() <= 10) {
            dp1[s[i].size()][pos[s[i][0]]][pos[s[i].back()]] += 1; 
        }
    }   
    
    mi ans = 0; 
    
    // Main DP Logic
    for(int len=3; len<=10; len++){
        
        // Reset dp2 for this length
        // Since it's global, we must manually clear it or overwrite it
        // Note: Using int32_t for loops is faster
        for(int32_t i=1; i<=timer; i++){
            for(int32_t j=1; j<=timer; j++){
                for(int32_t k=1; k<=timer; k++){
                    mi val = 0;
                    // Inner loop logic optimized:
                    // dp1 access is linear over x for the 2nd and 3rd term if we organized it differently,
                    // but static array is fast enough here.
                    for(int32_t x=1; x<=timer; x++){
                        val += dp1[len][x][i] * dp1[len][x][j] * dp1[len][x][k]; 
                    }
                    dp2[i][j][k] = val;
                }
            }
        }
        
        // CACHE OPTIMIZATION: Moved 'x' to the OUTERMOST loop
        // Original was i,j,k,x. Accessing dp2[x][...] inside inner loop jumps huge memory addresses.
        // With x outer, dp2[x][i][k] becomes constant for the inner loops over j.
        for(int32_t x=1; x<=timer; x++){
            for(int32_t i=1; i<=timer; i++){
                for(int32_t k=1; k<=timer; k++){
                     // Precompute/fetch constant parts for the inner j loop
                     mi term_xk = dp2[x][i][k];
                     
                     for(int32_t j=1; j<=timer; j++){
                        // dp2[i][j][k] is standard access
                        // dp2[x][i][k] is constant in this loop
                        // dp2[x][i][j] is linear access
                        // dp2[x][j][k] is scattered, but better than before
                        ans += dp2[i][j][k] * term_xk * dp2[x][i][j] * dp2[x][j][k]; 
                    }
                }
            }
        }
    }
    
    cout << (int)ans.v << '\n'; 
}

컴파일 시 표준 에러 (stderr) 메시지

In file included from /usr/include/c++/13/string:43,
                 from /usr/include/c++/13/bitset:52,
                 from /usr/include/x86_64-linux-gnu/c++/13/bits/stdc++.h:52,
                 from cubeword.cpp:4:
/usr/include/c++/13/bits/allocator.h: In destructor 'constexpr std::_Vector_base<long long int, std::allocator<long long int> >::_Vector_impl::~_Vector_impl()':
/usr/include/c++/13/bits/allocator.h:184:7: error: inlining failed in call to 'always_inline' 'constexpr std::allocator< <template-parameter-1-1> >::~allocator() noexcept [with _Tp = long long int]': target specific option mismatch
  184 |       ~allocator() _GLIBCXX_NOTHROW { }
      |       ^
In file included from /usr/include/c++/13/vector:66,
                 from /usr/include/c++/13/functional:64,
                 from /usr/include/x86_64-linux-gnu/c++/13/bits/stdc++.h:53:
/usr/include/c++/13/bits/stl_vector.h:133:14: note: called from here
  133 |       struct _Vector_impl
      |              ^~~~~~~~~~~~