Submission #1303012

#TimeUsernameProblemLanguageResultExecution timeMemory
1303012liangjeremyCubeword (CEOI19_cubeword)C++20
Compilation error
0 ms0 KiB
#pragma GCC optimize("O3,unroll-loops") #pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt") #include<bits/stdc++.h> using namespace std; // Keep the macro for your logic, but we will use int32_t for performance critical parts #define int long long // Standard definitions using ll=int64_t; // Optimized Modular Integer // Removed internal long long storage to save cache bandwidth template<int mod> struct mint { int32_t v; // Use 32-bit integer for storage mint():v(0){} mint(ll _v):v((int32_t)(_v%mod)){ if(v<0) v+=mod; } mint& operator+=(const mint& o){ if((v+=o.v)>=mod)v-=mod; return *this; } mint& operator-=(const mint& o){ if((v-=o.v)<0)v+=mod; return *this; } mint& operator*=(const mint& o){ v = (int32_t)((ll)v*o.v%mod); // Cast to long long only for multiplication return *this; } friend mint operator+(mint a, const mint& b){ return a+=b; } friend mint operator*(mint a, const mint& b){ return a*=b; } }; using mi = mint<998244353>; // GLOBAL STATIC ARRAYS // Replacing vectors with fixed size arrays for contiguous memory // 205 is sufficient based on your code (vis is size 200) const int MAXT = 205; mi dp1[11][MAXT][MAXT]; mi dp2[MAXT][MAXT][MAXT]; int32_t main(){ ios::sync_with_stdio(0); cin.tie(0); cout.tie(0); int n; if(!(cin >> n)) return 0; vector<string> s(2*n); vector<int> vis(256, 0); // Increased slightly for safety for(int i=0; i<n; i++){ cin >> s[i]; s[i+n] = s[i]; reverse(s[i+n].begin(), s[i+n].end()); for(auto x:s[i]){ vis[x]=1; } } sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); n = s.size(); int timer = 0; vector<int> pos(256); for(int i=0; i<256; i++){ if(vis[i]){ timer++; pos[i]=timer; } } // Fill dp1 for(int i=0; i<n; i++){ if(s[i].size() <= 10) { dp1[s[i].size()][pos[s[i][0]]][pos[s[i].back()]] += 1; } } mi ans = 0; // Main DP Logic for(int len=3; len<=10; len++){ // Reset dp2 for this length // Since it's global, we must manually clear it or overwrite it // Note: Using int32_t for loops is faster for(int32_t i=1; i<=timer; i++){ for(int32_t j=1; j<=timer; j++){ for(int32_t k=1; k<=timer; k++){ mi val = 0; // Inner loop logic optimized: // dp1 access is linear over x for the 2nd and 3rd term if we organized it differently, // but static array is fast enough here. for(int32_t x=1; x<=timer; x++){ val += dp1[len][x][i] * dp1[len][x][j] * dp1[len][x][k]; } dp2[i][j][k] = val; } } } // CACHE OPTIMIZATION: Moved 'x' to the OUTERMOST loop // Original was i,j,k,x. Accessing dp2[x][...] inside inner loop jumps huge memory addresses. // With x outer, dp2[x][i][k] becomes constant for the inner loops over j. for(int32_t x=1; x<=timer; x++){ for(int32_t i=1; i<=timer; i++){ for(int32_t k=1; k<=timer; k++){ // Precompute/fetch constant parts for the inner j loop mi term_xk = dp2[x][i][k]; for(int32_t j=1; j<=timer; j++){ // dp2[i][j][k] is standard access // dp2[x][i][k] is constant in this loop // dp2[x][i][j] is linear access // dp2[x][j][k] is scattered, but better than before ans += dp2[i][j][k] * term_xk * dp2[x][i][j] * dp2[x][j][k]; } } } } } cout << (int)ans.v << '\n'; }

Compilation message (stderr)

In file included from /usr/include/c++/13/string:43,
                 from /usr/include/c++/13/bitset:52,
                 from /usr/include/x86_64-linux-gnu/c++/13/bits/stdc++.h:52,
                 from cubeword.cpp:4:
/usr/include/c++/13/bits/allocator.h: In destructor 'constexpr std::_Vector_base<long long int, std::allocator<long long int> >::_Vector_impl::~_Vector_impl()':
/usr/include/c++/13/bits/allocator.h:184:7: error: inlining failed in call to 'always_inline' 'constexpr std::allocator< <template-parameter-1-1> >::~allocator() noexcept [with _Tp = long long int]': target specific option mismatch
  184 |       ~allocator() _GLIBCXX_NOTHROW { }
      |       ^
In file included from /usr/include/c++/13/vector:66,
                 from /usr/include/c++/13/functional:64,
                 from /usr/include/x86_64-linux-gnu/c++/13/bits/stdc++.h:53:
/usr/include/c++/13/bits/stl_vector.h:133:14: note: called from here
  133 |       struct _Vector_impl
      |              ^~~~~~~~~~~~