æ¯æããŸã£ãŠããè«æ±æžã®Excelãã¡ã€ã«ã1ã€éããŠãäŒç€Ÿåãã³ããŒããŠãéé¡ãã³ããŒããŠãæ¥ä»ãã³ããŒããŠãå¥ã®ã·ãŒãã«è²Œãä»ããŠââããã100ãã¡ã€ã«ç¹°ãè¿ãããäžžäžæ¥ãæ¶ããŠããŸããŸãããð ãããã³ãããã¹ãæ··ããã°ãåŸããç¯äººæ¢ãã®æéãŸã§çºçããŸãã
ãããªâExcelæäœæ¥ã«ãŒãâãæã£ãŠãããã®ãPythonãpathlibã§ãã¡ã€ã«äžèЧãååŸããopenpyxlã§ã»ã«ãèªã¿åããpandasã§è¡šã«ãŸãšããŠãæåŸã«to_excel()ã§æžãåºãââãã£ãããã ãã®æµãã§ãäœçŸãã¡ã€ã«ã§ãæ°ç§ãæ°åç§ã§åŠçã§ããäžçãæã«å
¥ããŸãâš
ããã©ã«ãã®äžã®è«æ±æžãå šéšèªãã§ãæ¥ä»ã»äŒç€Ÿåã»éé¡ã®äžèŠ§è¡šãäœãããšããä»åã®ãŽãŒã«ããããªãå®ç§ãªã³ãŒããæžãããšãããåŠçãåè§£ããã®ãæåã®ã³ãã§ãã
2021幎ãã©ã«ãå
ã®å
šExcelãã¡ã€ã«ã®ãã¹ãååŸãããã®3ã¹ããããé çªã«å®è£ ããŠããã°ãè¿·åã«ãªããã«ãŽãŒã«ãžãã©ãçããŸãã
ãã¡ã€ã«ã®ãã¹æäœã«ã¯æšæºã©ã€ãã©ãªã®pathlibã䟿å©ã§ããglob()ã§ã¯ã€ã«ãã«ãŒãæå®ããã°ãæ¡åŒµå.xlsxã®ãã¡ã€ã«ã ããäžæ°ã«éããããŸãã
from pathlib import Path
def main():
folder = Path(r"C:\work\2021幎")
for file_path in folder.glob("*.xlsx"):
print(file_path)
if __name__ == "__main__":
main() 絶察ãã¹ã§æå®ããŠããã°ãPythonã¹ã¯ãªãããã©ãã«çœ®ããŠãå®å®ããŠåããŸããforã«ãŒãã§1ãã¡ã€ã«ãã€åãåºããŠã次ã®ã¹ãããã«æž¡ããŠãããŸãããã
Excelæäœã®å®çªã©ã€ãã©ãªopenpyxlã䜿ã£ãŠãã¯ãŒã¯ããã¯ãèªã¿èŸŒã¿ãŸããè«æ±æžãã©ãŒããããçµ±äžãããŠãããªããã»ã«çªå°ãçŽæ¥æå®ããã®ããã¡ã°ãã·ã³ãã«ã
from openpyxl import load_workbook
wb = load_workbook(file_path, data_only=True)
ws = wb.worksheets[0]
invoice_date = ws["H4"].value # æ¥ä»
invoice_company = ws["A7"].value # è«æ±å
äŒç€Ÿå
invoice_amount = ws["E13"].value # åèšéé¡ ããã§éèŠãªã®ãdata_only=Trueã®ãªãã·ã§ã³ããããä»ããªããšãåèšéé¡ã®ã»ã«ã=SUM(...)ã®ãããªæ°åŒã®æååãšããŠè¿ã£ãŠããŠããŸããŸãããæ°åŒã®çµæïŒèšç®åŸã®å€ããæ¬²ãããšãã¯å¿
ãæå®ããŸãããã
ã»ã«ããèªã¿åã£ãæ¥ä»ã¯datetime.datetimeåã§æå»ãŸã§å«ãŸããŠããŸããäžèŠ§è¡šã«æå»ãŸã§åºããšèŠã¥ããã®ã§ãdatetime.dateã«å€æããŠãããšçŸããä»äžããã«ã
from datetime import date
invoice_date = date(invoice_date.year, invoice_date.month, invoice_date.day) 1ãã¡ã€ã«åã®èªã¿åãåŠçã¯ã颿°ãšããŠç¬ç«ãããŠãããšã«ãŒãéšåããã£ãšã¹ãããªããŸããåãã³ãïŒtype annotationïŒãä»ããŠãããšãåŸããèŠè¿ãããšãã®çè§£åºŠãæ®µéãã§ãã
from pathlib import Path
from datetime import date
from openpyxl import load_workbook
def get_invoice_data(file_path: Path) -> tuple[date, str, int]:
"""è«æ±æžExcelããæ¥ä»ã»äŒç€Ÿåã»éé¡ãæœåºãã"""
wb = load_workbook(file_path, data_only=True)
ws = wb.worksheets[0]
raw_date = ws["H4"].value
invoice_date = date(raw_date.year, raw_date.month, raw_date.day)
invoice_company = ws["A7"].value
invoice_amount = ws["E13"].value
return invoice_date, invoice_company, invoice_amount 衚圢åŒã®ããŒã¿ã¯pandasã®DataFrameã«ä»»ããã®ãæçã«ãŒããã«ãŒãã®äžã§1è¡ãã€è¿œå ããŠãããæåŸã«to_excel()ã§ãã¡ã€ã«ã«æžãåºããŸãã
import pandas as pd
from pathlib import Path
def main():
folder = Path(r"C:\work\2021幎")
df = pd.DataFrame(columns=["æ¥ä»", "è«æ±å
", "éé¡"])
for file_path in folder.glob("*.xlsx"):
invoice_date, invoice_company, invoice_amount = get_invoice_data(file_path)
new_row = pd.DataFrame([{
"æ¥ä»": invoice_date,
"è«æ±å
": invoice_company,
"éé¡": invoice_amount,
}])
df = pd.concat([df, new_row], ignore_index=True)
df.to_excel("2021å¹Žè«æ±æžäžèЧ.xlsx", index=False)
if __name__ == "__main__":
main() ignore_index=Trueãä»ããã®ããã€ã³ãããããå¿ãããšè¡çªå·ãéè€ããŠãšã©ãŒã«ãªããŸããåºåãããExcelãã¡ã€ã«ãéãã°ãæ¥ä»ã»è«æ±å
ã»éé¡ããããã«äžŠãã äžèŠ§è¡šãã§ãããã£ãŠããã¯ãã§ãð
ð¡ äœè£ãåºãŠãããããã©ã«ããååšããªããšãã®äŸå€åŠçããåŠçç¶æ³ãæ®ããã°åºåã远å ããŠã¿ãŸããããå®åã§äœ¿ãã¹ã¯ãªããã¯ãããŸã§äœã蟌ããšäžæ°ã«å ç¢ã«ãªããŸãã
åãä»çµã¿ãå°ãæ¡åŒµããã ãã§ãæ¥åæ¹åã®ã€ã³ãã¯ãã¯ã©ãã©ãåºãããŸãã
rglob("*.xlsx")ã§ãµããã©ã«ããŸã§ååž°çã«èµ°æ»pandasã®groupby()ã§åèšãäžçºç®åºãåãããªããèŠãããããšãæ¥åèªååã®ãã¡ã°ãã®è¿éãæå ã®ç°å¢ãšææãæŽããŠãããã ãã§ãåŠç¿ã¹ããŒãã¯ãã£ãšäžãããŸããå®éã«æãåããæéãå¢ãããã¢ã€ãã ãå³éžããŠç޹ä»ããŸãðª
openpyxlãpandasã䜿ã£ãExcelæäœããæ¥åã·ããªãªããŒã¹ã§äžå¯§ã«è§£èª¬ããŠãããå
¥éæžãåç»ã§åŸãç¥èãäœç³»ãšããŠåºããã®ã«æé©ã§ãã
Excelã ãã§ãªããPDFã»ã¡ãŒã«ã»Webã»ç»ååŠçãŸã§âé¢åãªäœæ¥âã®èªååãã¯ããã¯ãæºèŒãæå ã«1åãããšããµãšããæ¥åæ¹åã®ã¢ã€ãã¢ã湯氎ã®ããã«æ¹§ããŠããŸãã
å·Šã«ã³ãŒããå³ã«Excelãã¡ã€ã«ãå ¬åŒããã¥ã¡ã³ãã䞊ã¹ããšãèªååã¹ã¯ãªããäœæã®å¹çãåçã«ã¢ãããUSB-Cäžæ¬ã§æ¥ç¶ã§ããã¢ãã€ã«ã¢ãã¿ãŒã¯ãæºãçããŠãæ°è»œã«2ç»é¢åã§ããé Œããçžæ£ã§ãã
èªååã¹ã¯ãªããã¯ãã©ã€ïŒãšã©ãŒã®é£ç¶ãæãžã®è² æ ã軜ãããŠãããéé³èµ€è»žã®ã¡ã«ãã«ã«ããŒããŒããããã°ãéäžæéã䌞ã³ãŠãã°è§£æ±ºã®ã¹ããŒããäžãããŸãã
ãã©ã«ãå ã®å€§éExcelãã¡ã€ã«ãç®èŠã§ç¢ºèªããå Žé¢ã§ã¯ãé·æé䜿ã£ãŠãç²ãã«ãããã©ãã¯ããŒã«ãå§åçã«å¿«é©ãæéŠã®è² æ ãæžãããã®ã§ãèªååã¹ã¯ãªããã®ãããã°äœæ¥ãã¯ãã©ããŸãã
ã»ã«åäœã§ã®èªã¿æžããæžåŒèšå®ã¯openpyxlãåŸæãäžæ¹ã衚ãšããŠäžæ¬ã§èªã¿èŸŒãã§éèšããããªãpandasã®read_excel()ã䟿å©ã§ããä»åã®ããã«ãã©ãŒãããã®æ±ºãŸã£ãã»ã«ãããã³ãã€ã³ãã§å€ãåãåºããªãopenpyxlãååŸããçµæã衚ã«ãŸãšããŠåºåãããªãpandasããšåœ¹å²ãåããã®ãããããã§ãã
load_workbook()ã«data_only=Trueãæå®ããŠãã ããããããä»ãããšæ°åŒã§ã¯ãªãèšç®çµæã®å€ãè¿ã£ãŠããŸãããã ããExcelã§äžåºŠãä¿åããŠããªãç¶æ
ã ãšãã£ãã·ã¥ã空ãªã®ã§ã察象ãã¡ã€ã«ãäžåºŠExcelã§éããŠä¿åããŠããå¿
èŠããããŸãã
concurrent.futuresã®ThreadPoolExecutorã䜿ã£ãŠäžŠè¡åŠçããã®ã广çã§ããExcelãã¡ã€ã«èªã¿èŸŒã¿ã¯I/OããŠã³ããªåŠçãªã®ã§ããã«ãã¹ã¬ããã§äœæé床ã倧ããåäžããŸããäœçŸãã¡ã€ã«ã¬ãã«ã«ãªã£ããæ€èšããŠã¿ãŸãããã
ã»ã«äœçœ®ãçŽæ¥æå®ããã®ã§ã¯ãªãããè«æ±éé¡ããšããèŠåºãã»ã«ãæ¢ããŠããé£ã®ã»ã«ãèªããªã©ãããŒã¯ãŒãæ€çŽ¢ããŒã¹ã«åãæ¿ãããšå
ç¢ã«ãªããŸããtry/exceptã§åå¥ãã¡ã€ã«ã®ãšã©ãŒããã£ãããããã°ã«æ®ãã€ã€åŠçãç¶ç¶ãããäœãã«ãããšå®åã§ãå®å¿ã§ãã
ãpathlibã§ãã¡ã€ã«æäœ â openpyxlã§Excelèªã¿æžã â pandasã§ããŒã¿æŽåœ¢ â äŸå€åŠçïŒãã°åºå â ã¹ã±ãžã¥ãŒã«å®è¡ãã®é ãããããã1ã¹ããããã€èªåã®æ¥åã«åœãŠã¯ããŠäœã£ãŠãããšãæžç±ã§åŠãã ç¥èãâ䜿ããã¹ãã«âãšããŠå®çããŠãããŸãã
Excelã®ç¹°ãè¿ãäœæ¥ã¯ãèªååãããã®ç¬éãããæ¥æã忥æãæéãæµ®ãããšããè€å©ã®ãªã¿ãŒã³ãè¿ã£ãŠããŸãã仿¥ç޹ä»ããããã«ãåŠçã3ã¹ãããã«åè§£ããŠã颿°ã«åãåºããŠãªãã¡ã¯ã¿ãªã³ã°ââãã®äžé£ã®æµãã身ã«ã€ãã°ãè«æ±æžã ãã§ãªãèŠç©æžã»å£²äžéèšã»åšåº«ç®¡çãªã©ãããããæ¥åã«å¿çšã§ããããã«ãªããŸãðŒ
ãŸãã¯å°ããªãã©ã«ãã§è©ŠããŠããããã ãèªååã§ããïŒããšããæåäœéšãç©ãã§ã¿ãŠãã ããð£ ãã®ããã§æžç±ãããã€ã¹ç°å¢ãžã®æè³ãçµã¿åãããã°ãåŠç¿ã¹ããŒãã¯ããã«å éããŸãã仿¥ç޹ä»ããã¢ã€ãã ããããªãã®âè±ã»Excelæäœæ¥âã©ã€ããåŸæŒãããé Œããçžæ£ã«ãªããŸãããã«ïŒ
â ïž Windowsã»ãã¥ãªã…
ð§ çªç¶ã®æçš¿ã«ãã¡ã³éšç¶ïŒã©…
ð§© ããºã«ãè§£ãå¿«æãã³ãŒãã§…
âš ããã°ã©ã ã®åäœã軜ããã…
ð ãµãŒããŒãµã€ãéçºã®äžçãž…
âš é¢åãªWebäœæ¥ãèªååã…