diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py index f10eda94..9dbd7e03 100644 --- a/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py +++ b/llm_web_kit/extractor/html/recognizer/cc_math/render/mathjax.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List from pylatexenc import latexwalker +from pylatexenc.latexwalker import LatexWalkerParseError from llm_web_kit.extractor.html.recognizer.cc_math.common import CCMATH from llm_web_kit.extractor.html.recognizer.cc_math.render.render import ( @@ -381,8 +382,14 @@ def _process_math_in_text( else: matches = [] tem_match_display = [] - walker = latexwalker.LatexWalker(text) - nodelist, pos, len_ = walker.get_latex_nodes(pos=0) + # 使用 tolerant_parsing=False 来严格解析,避免不完整的公式被错误识别 + walker = latexwalker.LatexWalker(text, tolerant_parsing=False) + try: + nodelist, pos, len_ = walker.get_latex_nodes(pos=0) + except LatexWalkerParseError: + # 解析失败说明文本中包含不完整的数学公式标记(如 $$$), + # 此时跳过 latexwalker 解析,只使用正则匹配 + nodelist = [] for node in nodelist: # 标准的数学环境 if node.isNodeType(latexwalker.LatexMathNode): diff --git a/tests/llm_web_kit/simple/assets/double_dollar.html b/tests/llm_web_kit/simple/assets/double_dollar.html new file mode 100644 index 00000000..f32dc92d --- /dev/null +++ b/tests/llm_web_kit/simple/assets/double_dollar.html @@ -0,0 +1,460 @@ + + + + + + +
+ + + +
+ + + +
+

Tag Info

+
+ +
+
+ +
+

Hot answers tagged

+ +
+
+
+
+ +
+ I believe the problem is that the steady state may not exist, and the system instead exhibits steady growth (depending on parameters). + +The reason is because the model is equivalent to the standard consumption-saving problem with exogenous and constant interest rate. To see that, first consider the first order condition for labor choice $f_2(k,\ell) = w$ (... +
+ +
+
+
+
+ +
+ Economists have been exploring control theory applications to macro economics for decades. For example, here is a 40 year-old research paper written in 1976 on the topic. + +top of page 2 (also numbered 171) +In the past decade, a number of engineers and economists have asked the question: "If modern control theory can improve the guidance of airplanes and ... +
+ +
+
+
+
+ +
+ I am posting this as an answer, because it continues on user @ivansml answer... which is the one that identified the catch here, a catch I naively have overlooked (although it is a narrow case, while the interesting part comes after. Nevertheless, it should have been dealt with). +Indeed, with exogenous wage rate, and perfectly competitive optimization on ... +
+ +
+
+
+
+ +
+ There is no clear right and wrong about this, it's just a matter of convenience. The current-value Hamiltonian is likely to be more convenient when the objective function includes a discount factor. Following Chiang (1), suppose the problem is: +$\qquad$Maximise $V = \int_0^T G(t,y,u)e^{-\rho t}$ +$\qquad$subject to $\dot y=f(t,y,u)$ +$\qquad$and boundary ... +
+ +
+
+
+
+ +
+ In this case you have two state variables, so as you say, you have to check the FOC ($J$ is the Hamiltonian) for $\frac{\partial J}{\partial K}=-\dot\lambda_1$ and $\frac{\partial J}{\partial H}=-\dot\lambda_2$ (besides the FOC for control variables). In this sense the logic is the same as Lagrangian (in static context), there are as many lagrangians, as ... +
+ +
+
+
+
+ +
+ I think that the key question is whether this firm is the only firm in the economy. If it is then it is no longer correct for it to take $w$ as given as $w$ will be affected by its own capital accumulation decision. In this case you should make the substitutions that you made before your equation (2) while setting up the Hamiltonian. On the other hand if ... +
+ +
+
+
+
+ +
+ Following Caputo, an Optimal Control problem is autonomous when none of the functions appearing in the description of the problem depends explicitly on the time-variable. But this means that the standard setup with an exogenous utility-discount factor is not autonomous, but it can be made so by redefining the multiplier(s) and then using the current-value ... +
+ +
+
+
+
+ +
+ What about rewriting the problem in the following way? + +$$\underset{\left\{ c_{t}\right\} }{max}\int_{t=0}^{\infty}\left[u\left(c_{t}\right) e^{y_t}\right]e^{-\rho t}dt$$ + +with the new state variables defined as + +$$\begin{align} +\dot{k_{t}}=f\left(k_{t}\right)-c_{t}\\ +\dot{y}_{t}=-h\left(k_{t}\right) +\end{align}$$ + +given the initial conditions $(k(0),y(0))=(... +
+ +
+
+
+
+ +
+ Not really an answer, but too long for comment. + +The $P$ in your +$$y(u,v)^*= \frac{v-P(y^*)}{P'(y^*)} +u$$ +expression from the insider's problem and the $P$ in the expression +$$ +\min_{P(\cdot)} \cdots +$$ +from the market maker's problem should not be the same. That is not the definition of rational expectation equilibrium in this context. + +The insider ... +
+ +
+
+
+
+ +
+ The OP's answer is correct in its conclusion, but he applies a strange argument at the end to arrive there. + +Applying brute-force differentiation, the present value Hamiltonian is + +$$\mathcal{H}=e^{-\triangle} U\left( c\right) ++\lambda _{1}^{}\left[ f(k)-c\right] +\lambda _{2}\left[ \rho ++h(k)\right] $$ + +and so + +$$\frac {d\mathcal{H}}{dt} = -\dot \... +
+ +
+
+
+
+ +
+ One general issue I see is that you try to include uncertainty in a framework developed for a deterministic setup. + +What you do is to use expected income in the equation of motion for human capital. Let $I_{a,t}$ denote the indicator function for attack, taking the value $1$ when there is an attack, and the value $0$ when there isn't. Then, properly, + +$$\dot{... +
+ +
+
+
+
+ +
+ A small addendum. Lars Peter Hansen and Thomas J. Sargent wrote the book "Robustness", which is an attempt to apply robust control to economics. They treated robust control from a game theoretical perspective. + +In general, ecomics uses optimal control theory, which was the state of the art in the 1960s. The state of the art in the 1990s control theory (when ... +
+ +
+
+
+
+ +
+ For a PID system to work, you need to be (at least approximately) correct about the relationship between the variables you are trying to manage. Unfortunately the relationships between macroeconomic variables as predicted by mainstream economics has such a poor correspondence with reality, that any PID system based on these theories is doomed to fail. +
+ +
+
+
+
+ +
+ I think I proved in a rigorous way that the system is autonomous for the model that I have written 3-4 days and I think it is useful for the community, especially for those who are working on macroeconomics of growth. + +Let's write the present-value Hamiltonian ; + +$$\mathcal{H}=e^{-\triangle}\left[ U\left( c\right)\right] ++\lambda _{2}^{}\left[ f(k)-c\right] +... +
+ +
+
+
+
+ +
+ Differentiating $(3)$ with respect to time we get + +$$u_{cc}\dot c = \dot \lambda \implies \frac {u_{cc}}{u_c} \dot c = \frac {\dot \lambda}{\lambda}$$ + +Inserting into $(6)$ we obtain + +$$\dot M = -\frac {\eta_M}{\eta_{MM}}\frac {u_{cc}}{u_c} \dot c$$ + +So the fixed point of $M$ will happen under the same conditions that the fixed point of $c$ will. Also, the ... +
+ +
+
+
+
+ +
+ After a comment exchange, let's provide the answer under disrete-time formulation. The problem is now written + +\begin{align} + &\max_{\{u\}_1^T, \{y\}_1^T}\sum^T_{t=1}{F(y_t,u_t)}\\ +\text{s.t.} \quad & y_{t+1}-y_t = f(y_t,u_t)\\ +& y_1 = \text{given}\\ +& y_{T+1}~~\text{free} +\end{align} + +where $y_{t+1}$ is the value of the stock variable ... +
+ +
+
+
+
+ +
+ I recommend the manuscript here by Lawrence Evans. The related example is described as 'Rocket Railroad Car'. The first instance is on pages 9-12 where a geometric solution is provided. The choice set is not constrained to a discrete set in principle but the first part shows optimality when only the highest and lowest actions are chosen. On pages 35-36, ... +
+ +
+
+
+
+ +
+ The differential equation + +$$\dot k = \frac{1}{\sigma} k^\alpha - \delta k$$ + +has the structure of a Bernoulli equation. We solve it by the following transformation steps: + +1) Mulitply throughout by $k^{-\alpha}$: + +$$k^{-\alpha}\dot k = \frac{1}{\sigma} - \delta k^{1-\alpha} \tag{1}$$ + +2) Define the variable +$$z \equiv k^{1-\alpha} \implies \dot z = (1-\... +
+ +
+
+
+
+ +
+ Economists use optimal control both in microeconomics and in macroeconomics. +Your question is about economic policy in particular, but policy decisions can be guided both by micro and macro models. + +Macroeconomics + +Central banks do use both simple feedback loops (think Taylor rule) and optimal control analysis to guide decisions. There is this Federal ... +
+ +
+
+
+
+ +
+ Your value function is as follows: +$$ +V_t[w] = \max_{c_t \in[0,w]} \left\{u(c_t) + \frac{1}{2}V_{t+1}[\alpha(w_t - c_t)] + \frac{1}{2}V_{t+1}[\beta(w_t-c_t)] \right\} +$$ +with the terminal condition +$$ +V_{T}[w_T] = \max_{c_T \in [0,w_T]} u(c_T) +$$ + +So, we can solve this via backward induction. Clearly, at the final period $T$, since $u$ is monotonic, we ... +
+ +
+
+
+
+ +
+ Hamiltonian +\begin{align} +H(y(t),u(t),\lambda(t)) = y(t)+u(t)^2 + \lambda(t) u(t) +\end{align} + +First order conditions read +\begin{align} +&H_u = 0 \quad \Longleftrightarrow \quad u(t) = -\frac{\lambda(t)}{2}\\[2mm] +&\frac{d\lambda(t)}{dt} = -H_y = -1\\[2mm] +&\lambda(1) = 0 +\end{align} +Integrate costate +\begin{align} +&\int \frac{d\lambda(t)}{dt}... +
+ +
+
+
+
+ +
+ In my experience, it's mainly just for cleanliness for results. + +Consider an infinite horizon repeated game, with discounted payoff representation (where I use $\delta = (1-\lambda)$ in your notation) +$$ +(1-\delta)\sum_{t=0}^{\infty}\delta^t R_t +$$ +where $0 < \delta < 1$. + +Suppose I play a strategy that gives me the same payoff, say $a$, for each ... +
+ +
+
+
+
+ +
+ This question is too broad as it stands. There is a longer answer already, but I think it is possible to deal with a core part of the question easily. + +The concrete question is what is a state-space representation, if possible with some of the intuition, its uses in economics (control theory) + +My background is in control systems theory, where the notion of a ... +
+ +
+
+
+
+ +
+ Let's solve the differential equation (12). + +As a first step, we look for a "simpler" differential equation, namely (A.1). +(A.1) can be written +$$\frac{\dot{z}}{z}=-(1-\alpha)\left(\delta+\frac{\bar{x}e^{-\bar{x}t}}{e^{-\bar{x}t}+\bar{x}A}\right)$$ +On the left-hand side, you have the derivative of $\ln(z)$. You can integrate to obtain the form of any ... +
+ +
+
+
+
+ +
+ I would say that the main difference stems from the solution method, which results in your statement about all paths versus only the path at time t being true. + +Dynamic programming (at least when done numerically) consists of backward induction. One tries to identify the optimal action for all possible values of the state variable in the final period, and ... +
+ +
+
+
+
+ +
+ More of a comment: + +There should be an expectation operator in the statement of the problem, otherwise problem doesn't make sense. + +That "...the deterministic and stochastic value function must be the same..." is not quite right. The value of $\sigma^2$ is crucial in the restriction + +\begin{align} +\rho = \left(-n + \sigma^2\left(1 - \frac{\alpha\gamma}{2}\... +
+ +
+
+ +
+

Only top voted, non community-wiki answers of a minimum length are eligible

+ +
+ +
+ +
+
+ + + + + + \ No newline at end of file diff --git a/tests/llm_web_kit/simple/assets/triple_dollar.html b/tests/llm_web_kit/simple/assets/triple_dollar.html new file mode 100644 index 00000000..eb410644 --- /dev/null +++ b/tests/llm_web_kit/simple/assets/triple_dollar.html @@ -0,0 +1,8 @@ + + +
+ General Construction Job Board [$$$] Construction Estimator - Multi-Family - Blueprint Staffing, LLC December 07, 2023 at 05:59:10
Location: Virginia United States
We are looking for a detail-oriented and skilled Construction Estimator with 3+ years of experience estimating multi-family plumbing projects. Bid Evaluation for Correct Pricing and Plumbing Quality Project Analysis Project Support wit...
+ https://www.roadtechs.com/const/wwwboard/getpost.php?rec_nbr=835977 +
+ + \ No newline at end of file diff --git a/tests/llm_web_kit/simple/test_simple.py b/tests/llm_web_kit/simple/test_simple.py index 6e0abf96..442af4a2 100644 --- a/tests/llm_web_kit/simple/test_simple.py +++ b/tests/llm_web_kit/simple/test_simple.py @@ -642,6 +642,18 @@ def test_extract_main_html_with_script(self): self.assertIn('B. How does the TV advertising campaign initiated by IKEA overcome the entry barrier of high advertising expenditures?', md) self.assertIn('Johansson, J. K. (2006). Global marketing (4th edition ed.). New York: McGraw Hill Irwin.', md) + def test_extract_main_html_with_double_dollar(self): + """测试html文本中只包含$$的情况不会被误识别为数学公式.""" + html_content = open(os.path.join(self.base_path, 'assets', 'double_dollar.html'), 'r').read() + md = extract_content_from_main_html(self.url, html_content) + self.assertIn(r'he present value Hamiltonian is $$\mathcal{H}=e^{-\triangle} U\left( c\right) +\lambda _{1}^{}\left[ f(k)-c\right] +\lambda _{2}\left[ \rho +h(k)\right] $$ and so $$\frac {d\mathcal{H}}{dt} = -\dot \...', md) + + def test_extract_main_html_with_triple_dollar(self): + """测试html文本中只包含$$$的情况不会被误识别为数学公式.""" + html_content = open(os.path.join(self.base_path, 'assets', 'triple_dollar.html'), 'r').read() + md = extract_content_from_main_html(self.url, html_content) + self.assertIn(r'[$$$] Construction Estimator - Multi-Family - Blueprint Staffing, LLC December 07, 2023 at 05:59:10', md) + def test_extract_main_html_with_mathjax(self): """测试包含MathJax数学公式的HTML内容提取."""