Submission #1303011

#TimeUsernameProblemLanguageResultExecution timeMemory
1303011liangjeremyCubeword (CEOI19_cubeword)C++20
84 / 100
387 ms7472 KiB
#include<bits/stdc++.h> using namespace std; // Use standard int types for speed, avoid global #define int long long using ll = long long; const int MOD = 998244353; const int MAX_T = 60; // Max unique characters (2*N + buffer) // Static arrays for cache locality (Row-Major Order) int dp1[12][MAX_T][MAX_T]; int dp2[MAX_T][MAX_T][MAX_T]; int vis[256]; int pos[256]; int main(){ // Fast I/O ios::sync_with_stdio(0); cin.tie(0); cout.tie(0); int n; if(!(cin >> n)) return 0; vector<string> s(2*n); for(int i=0; i<n; i++){ cin >> s[i]; s[i+n] = s[i]; reverse(s[i+n].begin(), s[i+n].end()); for(char c : s[i]) vis[(unsigned char)c] = 1; } // Preprocessing: Unique strings and character mapping sort(s.begin(), s.end()); s.erase(unique(s.begin(), s.end()), s.end()); n = s.size(); int timer = 0; for(int i=0; i<256; i++){ if(vis[i]){ pos[i] = ++timer; } } // Initialize DP1 // dp1[len][start][end] for(int i=0; i<n; i++){ int len = s[i].size(); if(len <= 10) { dp1[len][pos[(unsigned char)s[i][0]]][pos[(unsigned char)s[i].back()]]++; } } ll ans = 0; // Main Logic for(int len=3; len<=10; len++){ // --- Step 1: Compute dp2 --- // dp2[i][j][k] = sum(dp1[x][i] * dp1[x][j] * dp1[x][k]) // Optimization: Intermediate sum fits in long long, mod only at the end. for(int i=1; i<=timer; i++){ for(int j=1; j<=timer; j++){ for(int k=1; k<=timer; k++){ ll current_sum = 0; for(int x=1; x<=timer; x++){ // CPU Branch prediction handles the 0 check or just raw mult is fast enough // Raw integer multiplication is much faster than modular mult ll v = (ll)dp1[len][x][i] * dp1[len][x][j]; if(v) current_sum += v * dp1[len][x][k]; } dp2[i][j][k] = current_sum % MOD; } } } // --- Step 2: Compute Answer --- // Formula: dp2[i][j][k] * dp2[x][i][k] * dp2[x][i][j] * dp2[x][j][k] // Symmetry: dp2[a][b][c] is symmetric. We can swap indices to match memory layout. // We want the inner loop variable 'x' to be the LAST index: dp2[...][x] for(int i=1; i<=timer; i++){ for(int j=1; j<=timer; j++){ for(int k=1; k<=timer; k++){ ll val_ijk = dp2[i][j][k]; if(val_ijk == 0) continue; // Optimization: Pruning ll sum_x = 0; for(int x=1; x<=timer; x++){ // Access as [fixed][fixed][varying] for contiguous memory // We need dp2[x][i][k] -> use dp2[i][k][x] // We need dp2[x][i][j] -> use dp2[i][j][x] // We need dp2[x][j][k] -> use dp2[j][k][x] ll term = (ll)dp2[i][k][x] * dp2[i][j][x] % MOD; // Delayed modulo for the summation usually risks overflow, // but term * term < ~4e18? No, max mod is 1e9, product is 1e18. // We must mod after multiplication. term = term * dp2[j][k][x] % MOD; sum_x += term; } ans = (ans + val_ijk * (sum_x % MOD)) % MOD; } } } } cout << ans << '\n'; }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...