v3.14.7
#10
by
JarrettYe
- opened
- app.py +2 -2
- utilities.py +16 -7
app.py
CHANGED
@@ -15,7 +15,7 @@ def get_w_markdown(w):
|
|
15 |
# Updated Parameters
|
16 |
Copy and paste these as shown in step 5 of the instructions:
|
17 |
|
18 |
-
`
|
19 |
|
20 |
Check out the Analysis tab for more detailed information."""
|
21 |
|
@@ -59,7 +59,7 @@ def anki_optimizer(file, timezone, next_day_starts_at, revlog_start_date, reques
|
|
59 |
|
60 |
|
61 |
description = """
|
62 |
-
# FSRS4Anki Optimizer App - v3.
|
63 |
Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
|
64 |
of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
|
65 |
|
|
|
15 |
# Updated Parameters
|
16 |
Copy and paste these as shown in step 5 of the instructions:
|
17 |
|
18 |
+
`{w}`
|
19 |
|
20 |
Check out the Analysis tab for more detailed information."""
|
21 |
|
|
|
59 |
|
60 |
|
61 |
description = """
|
62 |
+
# FSRS4Anki Optimizer App - v3.14.7
|
63 |
Based on the [tutorial](https://medium.com/@JarrettYe/how-to-use-the-next-generation-spaced-repetition-algorithm-fsrs-on-anki-5a591ca562e2)
|
64 |
of [Jarrett Ye](https://github.com/L-M-Sherlock). This application can give you personalized anki parameters without having to code.
|
65 |
|
utilities.py
CHANGED
@@ -64,9 +64,9 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
64 |
time_sequence = np.array(df['time'])
|
65 |
df.to_csv(proj_dir / "revlog.csv", index=False)
|
66 |
# print("revlog.csv saved.")
|
67 |
-
df = df[
|
68 |
df['real_days'] = df['review_date'] - timedelta(hours=next_day_starts_at)
|
69 |
-
df['real_days'] = pd.DatetimeIndex(df['real_days'].dt.floor('D')).to_julian_date()
|
70 |
df.drop_duplicates(['cid', 'real_days'], keep='first', inplace=True)
|
71 |
df['delta_t'] = df.real_days.diff()
|
72 |
df.dropna(inplace=True)
|
@@ -78,8 +78,14 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
78 |
|
79 |
# code from https://github.com/L-M-Sherlock/anki_revlog_analysis/blob/main/revlog_analysis.py
|
80 |
def get_feature(x):
|
|
|
81 |
for idx, log in enumerate(x.itertuples()):
|
|
|
|
|
|
|
82 |
if idx == 0:
|
|
|
|
|
83 |
x.iloc[idx, col_idx['delta_t']] = 0
|
84 |
if idx == x.shape[0] - 1:
|
85 |
break
|
@@ -90,7 +96,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
90 |
return x
|
91 |
|
92 |
tqdm.pandas(desc='Saving Trainset')
|
93 |
-
df = df.groupby('cid', as_index=False).progress_apply(get_feature)
|
94 |
df = df[df['id'] >= time.mktime(datetime.strptime(revlog_start_date, "%Y-%m-%d").timetuple()) * 1000]
|
95 |
df["t_history"] = df["t_history"].map(lambda x: x[1:] if len(x) > 1 else x)
|
96 |
df["r_history"] = df["r_history"].map(lambda x: x[1:] if len(x) > 1 else x)
|
@@ -108,16 +114,19 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
108 |
df = df.drop(columns=['id', 'cid', 'usn', 'ivl', 'last_lvl', 'factor', 'time', 'type', 'create_date', 'review_date',
|
109 |
'real_days', 'r', 't_history'])
|
110 |
df.drop_duplicates(inplace=True)
|
111 |
-
df
|
112 |
|
113 |
def cal_stability(group: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
114 |
if group['i'].values[0] > 1:
|
115 |
r_ivl_cnt = sum(group['delta_t'] * group['retention'].map(np.log) * pow(group['total_cnt'], 2))
|
116 |
ivl_ivl_cnt = sum(group['delta_t'].map(lambda x: x ** 2) * pow(group['total_cnt'], 2))
|
117 |
group['stability'] = round(np.log(0.9) / (r_ivl_cnt / ivl_ivl_cnt), 1)
|
118 |
else:
|
119 |
group['stability'] = 0.0
|
120 |
-
group['group_cnt'] = sum(group['total_cnt'])
|
121 |
group['avg_retention'] = round(
|
122 |
sum(group['retention'] * pow(group['total_cnt'], 2)) / sum(pow(group['total_cnt'], 2)), 3)
|
123 |
group['avg_interval'] = round(
|
@@ -128,7 +137,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
128 |
return group
|
129 |
|
130 |
tqdm.pandas(desc='Calculating Stability')
|
131 |
-
df = df.groupby(by=['r_history']).progress_apply(cal_stability)
|
132 |
# print("Stability calculated.")
|
133 |
df.reset_index(drop=True, inplace=True)
|
134 |
df.drop_duplicates(inplace=True)
|
@@ -143,7 +152,7 @@ def create_time_series_features(revlog_start_date, timezone, next_day_starts_at,
|
|
143 |
df['factor'] = round(df['stability'] / df['last_stability'], 2)
|
144 |
df = df[(df['i'] >= 2) & (df['group_cnt'] >= 100)]
|
145 |
df['last_recall'] = df['r_history'].map(lambda x: x[-1])
|
146 |
-
df = df[df.groupby(['i', 'r_history'])['group_cnt'].transform(max) == df['group_cnt']]
|
147 |
df.to_csv(proj_dir / 'stability_for_analysis.tsv', sep='\t', index=None)
|
148 |
# print("1:again, 2:hard, 3:good, 4:easy\n")
|
149 |
# print(df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
|
|
|
64 |
time_sequence = np.array(df['time'])
|
65 |
df.to_csv(proj_dir / "revlog.csv", index=False)
|
66 |
# print("revlog.csv saved.")
|
67 |
+
df = df[df['type'] != 3].copy()
|
68 |
df['real_days'] = df['review_date'] - timedelta(hours=next_day_starts_at)
|
69 |
+
df['real_days'] = pd.DatetimeIndex(df['real_days'].dt.floor('D', ambiguous='infer', nonexistent='shift_forward')).to_julian_date()
|
70 |
df.drop_duplicates(['cid', 'real_days'], keep='first', inplace=True)
|
71 |
df['delta_t'] = df.real_days.diff()
|
72 |
df.dropna(inplace=True)
|
|
|
78 |
|
79 |
# code from https://github.com/L-M-Sherlock/anki_revlog_analysis/blob/main/revlog_analysis.py
|
80 |
def get_feature(x):
|
81 |
+
last_kind = None
|
82 |
for idx, log in enumerate(x.itertuples()):
|
83 |
+
if last_kind is not None and last_kind in (1, 2) and log.type == 0:
|
84 |
+
return x.iloc[:idx]
|
85 |
+
last_kind = log.type
|
86 |
if idx == 0:
|
87 |
+
if log.type != 0:
|
88 |
+
return x.iloc[:idx]
|
89 |
x.iloc[idx, col_idx['delta_t']] = 0
|
90 |
if idx == x.shape[0] - 1:
|
91 |
break
|
|
|
96 |
return x
|
97 |
|
98 |
tqdm.pandas(desc='Saving Trainset')
|
99 |
+
df = df.groupby('cid', as_index=False, group_keys=False).progress_apply(get_feature)
|
100 |
df = df[df['id'] >= time.mktime(datetime.strptime(revlog_start_date, "%Y-%m-%d").timetuple()) * 1000]
|
101 |
df["t_history"] = df["t_history"].map(lambda x: x[1:] if len(x) > 1 else x)
|
102 |
df["r_history"] = df["r_history"].map(lambda x: x[1:] if len(x) > 1 else x)
|
|
|
114 |
df = df.drop(columns=['id', 'cid', 'usn', 'ivl', 'last_lvl', 'factor', 'time', 'type', 'create_date', 'review_date',
|
115 |
'real_days', 'r', 't_history'])
|
116 |
df.drop_duplicates(inplace=True)
|
117 |
+
df['retention'] = df['retention'].map(lambda x: max(min(0.99, x), 0.01))
|
118 |
|
119 |
def cal_stability(group: pd.DataFrame) -> pd.DataFrame:
|
120 |
+
group_cnt = sum(group['total_cnt'])
|
121 |
+
if group_cnt < 10:
|
122 |
+
return pd.DataFrame()
|
123 |
+
group['group_cnt'] = group_cnt
|
124 |
if group['i'].values[0] > 1:
|
125 |
r_ivl_cnt = sum(group['delta_t'] * group['retention'].map(np.log) * pow(group['total_cnt'], 2))
|
126 |
ivl_ivl_cnt = sum(group['delta_t'].map(lambda x: x ** 2) * pow(group['total_cnt'], 2))
|
127 |
group['stability'] = round(np.log(0.9) / (r_ivl_cnt / ivl_ivl_cnt), 1)
|
128 |
else:
|
129 |
group['stability'] = 0.0
|
|
|
130 |
group['avg_retention'] = round(
|
131 |
sum(group['retention'] * pow(group['total_cnt'], 2)) / sum(pow(group['total_cnt'], 2)), 3)
|
132 |
group['avg_interval'] = round(
|
|
|
137 |
return group
|
138 |
|
139 |
tqdm.pandas(desc='Calculating Stability')
|
140 |
+
df = df.groupby(by=['r_history'], group_keys=False).progress_apply(cal_stability)
|
141 |
# print("Stability calculated.")
|
142 |
df.reset_index(drop=True, inplace=True)
|
143 |
df.drop_duplicates(inplace=True)
|
|
|
152 |
df['factor'] = round(df['stability'] / df['last_stability'], 2)
|
153 |
df = df[(df['i'] >= 2) & (df['group_cnt'] >= 100)]
|
154 |
df['last_recall'] = df['r_history'].map(lambda x: x[-1])
|
155 |
+
df = df[df.groupby(['i', 'r_history'], group_keys=False)['group_cnt'].transform(max) == df['group_cnt']]
|
156 |
df.to_csv(proj_dir / 'stability_for_analysis.tsv', sep='\t', index=None)
|
157 |
# print("1:again, 2:hard, 3:good, 4:easy\n")
|
158 |
# print(df[df['r_history'].str.contains(r'^[1-4][^124]*$', regex=True)][
|