Merge pull request #3 from thobiast/dev

thobiast · web-flow · commit c3ce06c16b1a · 2025-08-19T09:04:11.000-03:00
Dev
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 requests
 pandas
+lxml
diff --git a/src/magicformulabr/__init__.py b/src/magicformulabr/__init__.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 """magicformulabr module."""
 
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/src/magicformulabr/main.py b/src/magicformulabr/main.py
@@ -21,6 +21,8 @@
 import requests
 
 URL = "http://fundamentus.com.br/resultado.php"
+CACHE_FILE = "data_cache.json"
+CACHE_DURATION_SECONDS = 86400  # 24 hours
 
 
 ##############################################################################
@@ -40,6 +42,7 @@ def parse_parameters():
         %(prog)s -v
         %(prog)s -vv
         %(prog)s -m 3 -vv
+        %(prog)s -m 3 --top 30 --force-update
     """
     parser = argparse.ArgumentParser(
         description="Gera rank de acoes usando a magic formula",
@@ -101,8 +104,8 @@ class DataSourceHandler:
     def __init__(
         self,
         url,
-        cache_file="data_cache.json",
-        cache_duration=86400,
+        cache_file=CACHE_FILE,
+        cache_duration=CACHE_DURATION_SECONDS,
         force_update=False,
     ):
         """
@@ -226,15 +229,15 @@ class MagicFormula:
     # MAGIC_METHOD_FIELD is a dictionary mapping method identifiers to their respective
     # financial metrics used in the Magic Formula calculation. Each method identifier
     # corresponds to a different set of financial metrics:
-    #   - "1": Uses 'P/L' for earnings yield and 'ROE' for return on capital.
-    #   - "2": Uses 'EV/EBIT' for earnings yield and 'ROIC' for return on capital.
-    #   - "3": Uses 'EV/EBITDA' for earnings yield and 'ROIC' for return on capital.
+    #   - 1: Uses 'P/L' for earnings yield and 'ROE' for return on capital.
+    #   - 2: Uses 'EV/EBIT' for earnings yield and 'ROIC' for return on capital.
+    #   - 3: Uses 'EV/EBITDA' for earnings yield and 'ROIC' for return on capital.
     # These mappings allow for flexibility in defining which financial metrics are used in
     # the calculationof the Magic Formula, accommodating different variations of the formula.
     MAGIC_METHOD_FIELD = {
-        "1": {"earnings yield": "P/L", "return on capital": "ROE"},
-        "2": {"earnings yield": "EV/EBIT", "return on capital": "ROIC"},
-        "3": {"earnings yield": "EV/EBITDA", "return on capital": "ROIC"},
+        1: {"earnings yield": "P/L", "return on capital": "ROE"},
+        2: {"earnings yield": "EV/EBIT", "return on capital": "ROIC"},
+        3: {"earnings yield": "EV/EBITDA", "return on capital": "ROIC"},
     }
 
     def __init__(self, pd_df, magic_method):
@@ -244,7 +247,7 @@ def __init__(self, pd_df, magic_method):
 
         Parameters:
             pd_df (pandas.DataFrame): The financial data of companies.
-            magic_method (str): Specifies the method used for calculating the Magic Formula ranking.
+            magic_method (int): Specifies the method used for calculating the Magic Formula ranking.
                                  It determines which financial metrics are used for earnings yield
                                  and return on capital calculations.
         """
@@ -261,7 +264,7 @@ def pct_to_float(number):
         """
         return float(number.strip("%").replace(".", "").replace(",", "."))
 
-    def apply_converters(self):
+    def _apply_converters(self):
         """
         Converts specific columns in `pd_df` to the necessary data type or format.
         """
@@ -284,7 +287,7 @@ def _remove_rows(self, col_name, min_value):
         logging.debug(self.pd_df.loc[tickers])
         self.pd_df.drop(tickers, inplace=True)
 
-    def filter_data(self):
+    def _filter_data(self):
         """
         Removes rows in `pd_df` with negative or undesirable values for key financial metrics.
 
@@ -294,42 +297,16 @@ def filter_data(self):
         self._remove_rows(self.ret_on_capital, 0)
         self._remove_rows("Liq.2meses", 0)
 
-    def drop_unneeded_columns(self, level):
-        """
-        Removes unnecessary columns from `pd_df` based on the specified verbosity level.
-
-        Parameters:
-            level (int): The verbosity level that determines which columns are retained.
-                        Higher levels retain more columns.
-        """
-        df_columns = self.pd_df.columns.tolist()
-        if level == 0:
-            keep_cols = [self.earnings_yield, self.ret_on_capital]
-        elif level == 1:
-            keep_cols = [
-                "Cotação",
-                "Div.Yield",
-                "ROIC",
-                "ROE",
-                "P/L",
-                "EV/EBIT",
-                "EV/EBITDA",
-                self.earnings_yield,
-                self.ret_on_capital,
-            ]
-        else:
-            return
-
-        remove_cols = [x for x in df_columns if x not in keep_cols]
-        self.pd_df.drop(remove_cols, axis="columns", inplace=True)
-
-    def calc_rank(self):
+    def _calculate_rank(self):
         """
         Calculates the Magic Formula ranking for companies in `pd_df`.
 
         Adds ranking columns to `pd_df` based on the earnings yield and return on capital,
         then calculates a final rank.
         """
+        if self.pd_df.empty:
+            return self.pd_df
+
         self.pd_df["Rank_earnings_yield"] = self.pd_df[self.earnings_yield].rank(
             ascending=True, method="min"
         )
@@ -341,16 +318,62 @@ def calc_rank(self):
         )
         self.pd_df.sort_values(by="Rank_Final", ascending=True, inplace=True)
 
-    def show_rank(self, top):
-        """
-        Displays the top-ranked companies.
+        return self.pd_df
 
-        Parameters:
-            top (int): The number of top-ranked companies to display.
+    def process(self):
+        """
+        Executes the full Magic Formula process: conversion, filtering, and ranking.
         """
-        self.pd_df.reset_index(inplace=True)
-        self.pd_df.index = self.pd_df.index + 1
-        print(self.pd_df.head(top).to_string())
+        self._apply_converters()
+        self._filter_data()
+        df_ranked = self._calculate_rank()
+        return df_ranked
+
+
+def display_results(df, args):
+    """
+    Displays the top-ranked companies according to verbosity.
+
+    Parameters:
+        df (pd.DataFrame):
+            The ranked DataFrame produced by `MagicFormula.calc_rank()`.
+        args (argparse.Namespace):
+            Parsed command-line arguments containing:
+                - method (int): Magic Formula method (1, 2, or 3) to determine key columns.
+                - verbose (int): Verbosity level (0, 1, or >=2) to control displayed columns.
+                - top (int): Number of companies to display.
+    """
+    df_display = df.copy()
+
+    base_cols = ["Rank_earnings_yield", "Rank_return_on_capital", "Rank_Final"]
+
+    magic_method_cols = MagicFormula.MAGIC_METHOD_FIELD[args.method]
+    earnings_yield_col = magic_method_cols["earnings yield"]
+    return_on_capital_col = magic_method_cols["return on capital"]
+
+    if args.verbose == 0:
+        keep_cols = [earnings_yield_col, return_on_capital_col] + base_cols
+    elif args.verbose == 1:
+        keep_cols = [
+            "Cotação",
+            "Div.Yield",
+            "ROIC",
+            "ROE",
+            "P/L",
+            "EV/EBIT",
+            "EV/EBITDA",
+            earnings_yield_col,
+            return_on_capital_col,
+        ] + base_cols
+    else:
+        keep_cols = df_display.columns.tolist()
+
+    # Remove duplicates preserving order
+    keep_cols = list(dict.fromkeys(keep_cols))
+
+    df_display.reset_index(inplace=True, names="Ticker")
+    df_display.index = df_display.index + 1
+    print(df_display.head(args.top).to_string(columns=["Ticker"] + keep_cols))
 
 
 ##############################################################################
@@ -370,12 +393,12 @@ def main():
     data_handler = DataSourceHandler(URL, force_update=args.force_update)
     pd_df = data_handler.get_data()
 
-    magicformula = MagicFormula(pd_df, str(args.method))
-    magicformula.apply_converters()
-    magicformula.filter_data()
-    magicformula.drop_unneeded_columns(level=args.verbose)
-    magicformula.calc_rank()
-    magicformula.show_rank(args.top)
+    magicformula = MagicFormula(pd_df, args.method)
+    ranked_df = magicformula.process()
+    if ranked_df.empty:
+        print("No companies passed the filtering criteria. The ranking is empty.")
+    else:
+        display_results(ranked_df, args)
 
 
 ##############################################################################
diff --git a/tests/test_magicformula_calc_rank.py b/tests/test_magicformula_calc_rank.py
@@ -7,15 +7,15 @@
 from magicformulabr.main import MagicFormula
 
 
-@pytest.fixture(scope="function")
-def sample_df():
+@pytest.fixture(name="sample_df", scope="function")
+def _sample_df():
     """Create fake dataframe."""
     dic_com = {"EV/EBIT": [0.2, 8, 4, 2], "ROIC": [10, 60, 40, 90]}
     my_df = pd.DataFrame(data=dic_com, index=["AAAA3", "BBBB3", "CCCC3", "DDDD4"])
     return my_df
 
 
-def test_calc_rank_method_2(sample_df):  # pylint: disable=redefined-outer-name
+def test_calc_rank_method_2(sample_df):
     """Test calc_rank method."""
     expected_result = """\
        EV/EBIT  ROIC  Rank_earnings_yield  Rank_return_on_capital  Rank_Final
@@ -24,6 +24,6 @@ def test_calc_rank_method_2(sample_df):  # pylint: disable=redefined-outer-name
 BBBB3      8.0    60                  4.0                     2.0         6.0
 CCCC3      4.0    40                  3.0                     3.0         6.0"""
 
-    magic_formula = MagicFormula(sample_df, magic_method="2")
-    magic_formula.calc_rank()
-    assert magic_formula.pd_df.to_string() == expected_result
+    magic_formula = MagicFormula(sample_df, magic_method=2)
+    pd_df = magic_formula._calculate_rank()  # pylint: disable=protected-access
+    assert pd_df.to_string() == expected_result