#include<bits/stdc++.h>
using namespace std;
// Use standard int types for speed, avoid global #define int long long
using ll = long long;
const int MOD = 998244353;
const int MAX_T = 60; // Max unique characters (2*N + buffer)
// Static arrays for cache locality (Row-Major Order)
int dp1[12][MAX_T][MAX_T];
int dp2[MAX_T][MAX_T][MAX_T];
int vis[256];
int pos[256];
int main(){
// Fast I/O
ios::sync_with_stdio(0); cin.tie(0); cout.tie(0);
int n;
if(!(cin >> n)) return 0;
vector<string> s(2*n);
for(int i=0; i<n; i++){
cin >> s[i];
s[i+n] = s[i];
reverse(s[i+n].begin(), s[i+n].end());
for(char c : s[i]) vis[(unsigned char)c] = 1;
}
// Preprocessing: Unique strings and character mapping
sort(s.begin(), s.end());
s.erase(unique(s.begin(), s.end()), s.end());
n = s.size();
int timer = 0;
for(int i=0; i<256; i++){
if(vis[i]){
pos[i] = ++timer;
}
}
// Initialize DP1
// dp1[len][start][end]
for(int i=0; i<n; i++){
int len = s[i].size();
if(len <= 10) {
dp1[len][pos[(unsigned char)s[i][0]]][pos[(unsigned char)s[i].back()]]++;
}
}
ll ans = 0;
// Main Logic
for(int len=3; len<=10; len++){
// --- Step 1: Compute dp2 ---
// dp2[i][j][k] = sum(dp1[x][i] * dp1[x][j] * dp1[x][k])
// Optimization: Intermediate sum fits in long long, mod only at the end.
for(int i=1; i<=timer; i++){
for(int j=1; j<=timer; j++){
for(int k=1; k<=timer; k++){
ll current_sum = 0;
for(int x=1; x<=timer; x++){
// CPU Branch prediction handles the 0 check or just raw mult is fast enough
// Raw integer multiplication is much faster than modular mult
ll v = (ll)dp1[len][x][i] * dp1[len][x][j];
if(v) current_sum += v * dp1[len][x][k];
}
dp2[i][j][k] = current_sum % MOD;
}
}
}
// --- Step 2: Compute Answer ---
// Formula: dp2[i][j][k] * dp2[x][i][k] * dp2[x][i][j] * dp2[x][j][k]
// Symmetry: dp2[a][b][c] is symmetric. We can swap indices to match memory layout.
// We want the inner loop variable 'x' to be the LAST index: dp2[...][x]
for(int i=1; i<=timer; i++){
for(int j=1; j<=timer; j++){
for(int k=1; k<=timer; k++){
ll val_ijk = dp2[i][j][k];
if(val_ijk == 0) continue; // Optimization: Pruning
ll sum_x = 0;
for(int x=1; x<=timer; x++){
// Access as [fixed][fixed][varying] for contiguous memory
// We need dp2[x][i][k] -> use dp2[i][k][x]
// We need dp2[x][i][j] -> use dp2[i][j][x]
// We need dp2[x][j][k] -> use dp2[j][k][x]
ll term = (ll)dp2[i][k][x] * dp2[i][j][x] % MOD;
// Delayed modulo for the summation usually risks overflow,
// but term * term < ~4e18? No, max mod is 1e9, product is 1e18.
// We must mod after multiplication.
term = term * dp2[j][k][x] % MOD;
sum_x += term;
}
ans = (ans + val_ijk * (sum_x % MOD)) % MOD;
}
}
}
}
cout << ans << '\n';
}
| # | Verdict | Execution time | Memory | Grader output |
|---|
| Fetching results... |
| # | Verdict | Execution time | Memory | Grader output |
|---|
| Fetching results... |
| # | Verdict | Execution time | Memory | Grader output |
|---|
| Fetching results... |
| # | Verdict | Execution time | Memory | Grader output |
|---|
| Fetching results... |